In [1]:
# !pip install git+https://github.com/alberanid/imdbpy
# !pip install pandas
# !pip install numpy
# !pip install matplotlib
# !pip install seaborn
# !pip install pandas_profiling --upgrade
# !pip install plotly
# !pip install wordcloud
# !pip install Flask
In [2]:
# Import Dataset
# Import File from Loacal Drive
# from google.colab import files
# data_to_load = files.upload()
# from google.colab import drive
# drive.mount('/content/drive')
In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import collections
import plotly.express as px
import plotly.graph_objects as go
import nltk
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.probability import FreqDist
from nltk.util import ngrams
from plotly.subplots import make_subplots
from plotly.offline import iplot, init_notebook_mode
from wordcloud import WordCloud, STOPWORDS
from pandas_profiling import ProfileReport
%matplotlib inline
warnings.filterwarnings("ignore")
In [4]:
nltk.download('all')
[nltk_data] Downloading collection 'all'
[nltk_data]    | 
[nltk_data]    | Downloading package abc to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package abc is already up-to-date!
[nltk_data]    | Downloading package alpino to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package alpino is already up-to-date!
[nltk_data]    | Downloading package biocreative_ppi to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package biocreative_ppi is already up-to-date!
[nltk_data]    | Downloading package brown to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package brown is already up-to-date!
[nltk_data]    | Downloading package brown_tei to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package brown_tei is already up-to-date!
[nltk_data]    | Downloading package cess_cat to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package cess_cat is already up-to-date!
[nltk_data]    | Downloading package cess_esp to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package cess_esp is already up-to-date!
[nltk_data]    | Downloading package chat80 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package chat80 is already up-to-date!
[nltk_data]    | Downloading package city_database to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package city_database is already up-to-date!
[nltk_data]    | Downloading package cmudict to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package cmudict is already up-to-date!
[nltk_data]    | Downloading package comparative_sentences to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package comparative_sentences is already up-to-
[nltk_data]    |       date!
[nltk_data]    | Downloading package comtrans to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package comtrans is already up-to-date!
[nltk_data]    | Downloading package conll2000 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package conll2000 is already up-to-date!
[nltk_data]    | Downloading package conll2002 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package conll2002 is already up-to-date!
[nltk_data]    | Downloading package conll2007 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package conll2007 is already up-to-date!
[nltk_data]    | Downloading package crubadan to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package crubadan is already up-to-date!
[nltk_data]    | Downloading package dependency_treebank to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package dependency_treebank is already up-to-date!
[nltk_data]    | Downloading package dolch to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package dolch is already up-to-date!
[nltk_data]    | Downloading package europarl_raw to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package europarl_raw is already up-to-date!
[nltk_data]    | Downloading package floresta to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package floresta is already up-to-date!
[nltk_data]    | Downloading package framenet_v15 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package framenet_v15 is already up-to-date!
[nltk_data]    | Downloading package framenet_v17 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package framenet_v17 is already up-to-date!
[nltk_data]    | Downloading package gazetteers to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package gazetteers is already up-to-date!
[nltk_data]    | Downloading package genesis to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package genesis is already up-to-date!
[nltk_data]    | Downloading package gutenberg to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package gutenberg is already up-to-date!
[nltk_data]    | Downloading package ieer to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package ieer is already up-to-date!
[nltk_data]    | Downloading package inaugural to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package inaugural is already up-to-date!
[nltk_data]    | Downloading package indian to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package indian is already up-to-date!
[nltk_data]    | Downloading package jeita to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package jeita is already up-to-date!
[nltk_data]    | Downloading package kimmo to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package kimmo is already up-to-date!
[nltk_data]    | Downloading package knbc to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package knbc is already up-to-date!
[nltk_data]    | Downloading package lin_thesaurus to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package lin_thesaurus is already up-to-date!
[nltk_data]    | Downloading package mac_morpho to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package mac_morpho is already up-to-date!
[nltk_data]    | Downloading package machado to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package machado is already up-to-date!
[nltk_data]    | Downloading package masc_tagged to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package masc_tagged is already up-to-date!
[nltk_data]    | Downloading package moses_sample to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package moses_sample is already up-to-date!
[nltk_data]    | Downloading package movie_reviews to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package movie_reviews is already up-to-date!
[nltk_data]    | Downloading package names to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package names is already up-to-date!
[nltk_data]    | Downloading package nombank.1.0 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package nombank.1.0 is already up-to-date!
[nltk_data]    | Downloading package nps_chat to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package nps_chat is already up-to-date!
[nltk_data]    | Downloading package omw to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package omw is already up-to-date!
[nltk_data]    | Downloading package opinion_lexicon to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package opinion_lexicon is already up-to-date!
[nltk_data]    | Downloading package paradigms to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package paradigms is already up-to-date!
[nltk_data]    | Downloading package pil to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package pil is already up-to-date!
[nltk_data]    | Downloading package pl196x to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package pl196x is already up-to-date!
[nltk_data]    | Downloading package ppattach to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package ppattach is already up-to-date!
[nltk_data]    | Downloading package problem_reports to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package problem_reports is already up-to-date!
[nltk_data]    | Downloading package propbank to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package propbank is already up-to-date!
[nltk_data]    | Downloading package ptb to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package ptb is already up-to-date!
[nltk_data]    | Downloading package product_reviews_1 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package product_reviews_1 is already up-to-date!
[nltk_data]    | Downloading package product_reviews_2 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package product_reviews_2 is already up-to-date!
[nltk_data]    | Downloading package pros_cons to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package pros_cons is already up-to-date!
[nltk_data]    | Downloading package qc to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package qc is already up-to-date!
[nltk_data]    | Downloading package reuters to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package reuters is already up-to-date!
[nltk_data]    | Downloading package rte to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package rte is already up-to-date!
[nltk_data]    | Downloading package semcor to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package semcor is already up-to-date!
[nltk_data]    | Downloading package senseval to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package senseval is already up-to-date!
[nltk_data]    | Downloading package sentiwordnet to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package sentiwordnet is already up-to-date!
[nltk_data]    | Downloading package sentence_polarity to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package sentence_polarity is already up-to-date!
[nltk_data]    | Downloading package shakespeare to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package shakespeare is already up-to-date!
[nltk_data]    | Downloading package sinica_treebank to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package sinica_treebank is already up-to-date!
[nltk_data]    | Downloading package smultron to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package smultron is already up-to-date!
[nltk_data]    | Downloading package state_union to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package state_union is already up-to-date!
[nltk_data]    | Downloading package stopwords to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package stopwords is already up-to-date!
[nltk_data]    | Downloading package subjectivity to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package subjectivity is already up-to-date!
[nltk_data]    | Downloading package swadesh to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package swadesh is already up-to-date!
[nltk_data]    | Downloading package switchboard to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package switchboard is already up-to-date!
[nltk_data]    | Downloading package timit to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package timit is already up-to-date!
[nltk_data]    | Downloading package toolbox to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package toolbox is already up-to-date!
[nltk_data]    | Downloading package treebank to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package treebank is already up-to-date!
[nltk_data]    | Downloading package twitter_samples to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package twitter_samples is already up-to-date!
[nltk_data]    | Downloading package udhr to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package udhr is already up-to-date!
[nltk_data]    | Downloading package udhr2 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package udhr2 is already up-to-date!
[nltk_data]    | Downloading package unicode_samples to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package unicode_samples is already up-to-date!
[nltk_data]    | Downloading package universal_treebanks_v20 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package universal_treebanks_v20 is already up-to-
[nltk_data]    |       date!
[nltk_data]    | Downloading package verbnet to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package verbnet is already up-to-date!
[nltk_data]    | Downloading package verbnet3 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package verbnet3 is already up-to-date!
[nltk_data]    | Downloading package webtext to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package webtext is already up-to-date!
[nltk_data]    | Downloading package wordnet to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package wordnet is already up-to-date!
[nltk_data]    | Downloading package wordnet_ic to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package wordnet_ic is already up-to-date!
[nltk_data]    | Downloading package words to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package words is already up-to-date!
[nltk_data]    | Downloading package ycoe to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package ycoe is already up-to-date!
[nltk_data]    | Downloading package rslp to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package rslp is already up-to-date!
[nltk_data]    | Downloading package maxent_treebank_pos_tagger to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package maxent_treebank_pos_tagger is already up-
[nltk_data]    |       to-date!
[nltk_data]    | Downloading package universal_tagset to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package universal_tagset is already up-to-date!
[nltk_data]    | Downloading package maxent_ne_chunker to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package maxent_ne_chunker is already up-to-date!
[nltk_data]    | Downloading package punkt to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package punkt is already up-to-date!
[nltk_data]    | Downloading package book_grammars to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package book_grammars is already up-to-date!
[nltk_data]    | Downloading package sample_grammars to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package sample_grammars is already up-to-date!
[nltk_data]    | Downloading package spanish_grammars to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package spanish_grammars is already up-to-date!
[nltk_data]    | Downloading package basque_grammars to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package basque_grammars is already up-to-date!
[nltk_data]    | Downloading package large_grammars to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package large_grammars is already up-to-date!
[nltk_data]    | Downloading package tagsets to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package tagsets is already up-to-date!
[nltk_data]    | Downloading package snowball_data to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package snowball_data is already up-to-date!
[nltk_data]    | Downloading package bllip_wsj_no_aux to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package bllip_wsj_no_aux is already up-to-date!
[nltk_data]    | Downloading package word2vec_sample to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package word2vec_sample is already up-to-date!
[nltk_data]    | Downloading package panlex_swadesh to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package panlex_swadesh is already up-to-date!
[nltk_data]    | Downloading package mte_teip5 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package mte_teip5 is already up-to-date!
[nltk_data]    | Downloading package averaged_perceptron_tagger to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package averaged_perceptron_tagger is already up-
[nltk_data]    |       to-date!
[nltk_data]    | Downloading package averaged_perceptron_tagger_ru to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package averaged_perceptron_tagger_ru is already
[nltk_data]    |       up-to-date!
[nltk_data]    | Downloading package perluniprops to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package perluniprops is already up-to-date!
[nltk_data]    | Downloading package nonbreaking_prefixes to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package nonbreaking_prefixes is already up-to-date!
[nltk_data]    | Downloading package vader_lexicon to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package vader_lexicon is already up-to-date!
[nltk_data]    | Downloading package porter_test to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package porter_test is already up-to-date!
[nltk_data]    | Downloading package wmt15_eval to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package wmt15_eval is already up-to-date!
[nltk_data]    | Downloading package mwa_ppdb to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package mwa_ppdb is already up-to-date!
[nltk_data]    | 
[nltk_data]  Done downloading collection all
Out[4]:
True
In [5]:
# path = '/content/drive/MyDrive/Files/'

path = 'C:\\Users\\pawan\\OneDrive\\Desktop\\ott\\Data\\'
 
df_movies = pd.read_csv(path + 'ottmovies.csv')
 
df_movies.head()
Out[5]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country Language Plotline Runtime Kind Seasons Netflix Hulu Prime Video Disney+ Type
0 1 Inception 2010 13+ 8.8 87% Christopher Nolan Leonardo DiCaprio,Joseph Gordon-Levitt,Elliot ... Action,Adventure,Sci-Fi,Thriller United States,United Kingdom English,Japanese,French Dom Cobb is a skilled thief, the absolute best... 148.0 movie NaN 1 0 0 0 0
1 2 The Matrix 1999 16+ 8.7 88% Lana Wachowski,Lilly Wachowski Keanu Reeves,Laurence Fishburne,Carrie-Anne Mo... Action,Sci-Fi United States English Thomas A. Anderson is a man living two lives. ... 136.0 movie NaN 1 0 0 0 0
2 3 Avengers: Infinity War 2018 13+ 8.4 85% Anthony Russo,Joe Russo Robert Downey Jr.,Chris Hemsworth,Mark Ruffalo... Action,Adventure,Sci-Fi United States English As the Avengers and their allies have continue... 149.0 movie NaN 1 0 0 0 0
3 4 Back to the Future 1985 7+ 8.5 96% Robert Zemeckis Michael J. Fox,Christopher Lloyd,Lea Thompson,... Adventure,Comedy,Sci-Fi United States English Marty McFly, a typical American teenager of th... 116.0 movie NaN 1 0 0 0 0
4 5 The Good, the Bad and the Ugly 1966 16+ 8.8 97% Sergio Leone Eli Wallach,Clint Eastwood,Lee Van Cleef,Aldo ... Western Italy,Spain,West Germany,United States Italian Blondie (The Good) (Clint Eastwood) is a profe... 161.0 movie NaN 1 0 1 0 0
In [6]:
# profile = ProfileReport(df_movies)
# profile
In [7]:
def data_investigate(df):
    print('No of Rows : ', df.shape[0])
    print('No of Coloums : ', df.shape[1])
    print('**'*25)
    print('Colums Names : \n', df.columns)
    print('**'*25)
    print('Datatype of Columns : \n', df.dtypes)
    print('**'*25)
    print('Missing Values : ')
    c = df.isnull().sum()
    c = c[c > 0]
    print(c)
    print('**'*25)
    print('Missing vaules %age wise :\n')
    print((100*(df.isnull().sum()/len(df.index))))
    print('**'*25)
    print('Pictorial Representation : ')
    plt.figure(figsize = (10, 10))
    sns.heatmap(df.isnull(), yticklabels = False, cbar = False)
    plt.show()
In [8]:
data_investigate(df_movies)
No of Rows :  16923
No of Coloums :  20
**************************************************
Colums Names : 
 Index(['ID', 'Title', 'Year', 'Age', 'IMDb', 'Rotten Tomatoes', 'Directors',
       'Cast', 'Genres', 'Country', 'Language', 'Plotline', 'Runtime', 'Kind',
       'Seasons', 'Netflix', 'Hulu', 'Prime Video', 'Disney+', 'Type'],
      dtype='object')
**************************************************
Datatype of Columns : 
 ID                   int64
Title               object
Year                 int64
Age                 object
IMDb               float64
Rotten Tomatoes     object
Directors           object
Cast                object
Genres              object
Country             object
Language            object
Plotline            object
Runtime            float64
Kind                object
Seasons            float64
Netflix              int64
Hulu                 int64
Prime Video          int64
Disney+              int64
Type                 int64
dtype: object
**************************************************
Missing Values : 
Age                 8457
IMDb                 328
Rotten Tomatoes    10437
Directors            357
Cast                 648
Genres               234
Country              303
Language             437
Plotline            4958
Runtime              382
Seasons            16923
dtype: int64
**************************************************
Missing vaules %age wise :

ID                   0.000000
Title                0.000000
Year                 0.000000
Age                 49.973409
IMDb                 1.938191
Rotten Tomatoes     61.673462
Directors            2.109555
Cast                 3.829108
Genres               1.382734
Country              1.790463
Language             2.582284
Plotline            29.297406
Runtime              2.257283
Kind                 0.000000
Seasons            100.000000
Netflix              0.000000
Hulu                 0.000000
Prime Video          0.000000
Disney+              0.000000
Type                 0.000000
dtype: float64
**************************************************
Pictorial Representation : 
In [9]:
# ID
# df_movies = df_movies.drop(['ID'], axis = 1)
 
# Age
df_movies.loc[df_movies['Age'].isnull() & df_movies['Disney+'] == 1, "Age"] = '13'
# df_movies.fillna({'Age' : 18}, inplace = True)
df_movies.fillna({'Age' : 'NR'}, inplace = True)
df_movies['Age'].replace({'all': '0'}, inplace = True)
df_movies['Age'].replace({'7+': '7'}, inplace = True)
df_movies['Age'].replace({'13+': '13'}, inplace = True)
df_movies['Age'].replace({'16+': '16'}, inplace = True)
df_movies['Age'].replace({'18+': '18'}, inplace = True)
# df_movies['Age'] = df_movies['Age'].astype(int)
 
# IMDb
# df_movies.fillna({'IMDb' : df_movies['IMDb'].mean()}, inplace = True)
# df_movies.fillna({'IMDb' : df_movies['IMDb'].median()}, inplace = True)
df_movies.fillna({'IMDb' : "NA"}, inplace = True)
 
# Rotten Tomatoes
df_movies['Rotten Tomatoes'] = df_movies['Rotten Tomatoes'][df_movies['Rotten Tomatoes'].notnull()].str.replace('%', '').astype(int)
# df_movies['Rotten Tomatoes'] = df_movies['Rotten Tomatoes'][df_movies['Rotten Tomatoes'].notnull()].astype(int)
# df_movies.fillna({'Rotten Tomatoes' : df_movies['Rotten Tomatoes'].mean()}, inplace = True)
# df_movies.fillna({'Rotten Tomatoes' : df_movies['Rotten Tomatoes'].median()}, inplace = True)
# df_movies['Rotten Tomatoes'] = df_movies['Rotten Tomatoes'].astype(int)
df_movies.fillna({'Rotten Tomatoes' : "NA"}, inplace = True)
 
# Directors
# df_movies = df_movies.drop(['Directors'], axis = 1)
df_movies.fillna({'Directors' : "NA"}, inplace = True)
 
# Cast
df_movies.fillna({'Cast' : "NA"}, inplace = True)
 
# Genres
df_movies.fillna({'Genres': "NA"}, inplace = True)
 
# Country
df_movies.fillna({'Country': "NA"}, inplace = True)
 
# Language
df_movies.fillna({'Language': "NA"}, inplace = True)
 
# Plotline
df_movies.fillna({'Plotline': "NA"}, inplace = True)
 
# Runtime
# df_movies.fillna({'Runtime' : df_movies['Runtime'].mean()}, inplace = True)
# df_movies['Runtime'] = df_movies['Runtime'].astype(int)
df_movies.fillna({'Runtime' : "NA"}, inplace = True)
 
# Kind
# df_movies.fillna({'Kind': "NA"}, inplace = True)
 
# Type
# df_movies.fillna({'Type': "NA"}, inplace = True)
# df_movies = df_movies.drop(['Type'], axis = 1)
 
# Seasons
# df_movies.fillna({'Seasons': 1}, inplace = True)
# df_movies.fillna({'Seasons': "NA"}, inplace = True)
df_movies = df_movies.drop(['Seasons'], axis = 1)
# df_movies['Seasons'] = df_movies['Seasons'].astype(int)
# df_movies.fillna({'Seasons' : df_movies['Seasons'].mean()}, inplace = True)
# df_movies['Seasons'] = df_movies['Seasons'].astype(int)
 
# Service Provider
df_movies['Service Provider'] = df_movies.loc[:, ['Netflix', 'Prime Video', 'Disney+', 'Hulu']].idxmax(axis = 1)
# df_movies.drop(['Netflix','Prime Video','Disney+','Hulu'], axis = 1)

# Removing Duplicate and Missing Entries
df_movies.dropna(how = 'any', inplace = True)
df_movies.drop_duplicates(inplace = True)
In [10]:
data_investigate(df_movies)
No of Rows :  16923
No of Coloums :  20
**************************************************
Colums Names : 
 Index(['ID', 'Title', 'Year', 'Age', 'IMDb', 'Rotten Tomatoes', 'Directors',
       'Cast', 'Genres', 'Country', 'Language', 'Plotline', 'Runtime', 'Kind',
       'Netflix', 'Hulu', 'Prime Video', 'Disney+', 'Type',
       'Service Provider'],
      dtype='object')
**************************************************
Datatype of Columns : 
 ID                   int64
Title               object
Year                 int64
Age                 object
IMDb                object
Rotten Tomatoes     object
Directors           object
Cast                object
Genres              object
Country             object
Language            object
Plotline            object
Runtime             object
Kind                object
Netflix              int64
Hulu                 int64
Prime Video          int64
Disney+              int64
Type                 int64
Service Provider    object
dtype: object
**************************************************
Missing Values : 
Series([], dtype: int64)
**************************************************
Missing vaules %age wise :

ID                  0.0
Title               0.0
Year                0.0
Age                 0.0
IMDb                0.0
Rotten Tomatoes     0.0
Directors           0.0
Cast                0.0
Genres              0.0
Country             0.0
Language            0.0
Plotline            0.0
Runtime             0.0
Kind                0.0
Netflix             0.0
Hulu                0.0
Prime Video         0.0
Disney+             0.0
Type                0.0
Service Provider    0.0
dtype: float64
**************************************************
Pictorial Representation : 
In [11]:
df_movies.head()
Out[11]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country Language Plotline Runtime Kind Netflix Hulu Prime Video Disney+ Type Service Provider
0 1 Inception 2010 13 8.8 87 Christopher Nolan Leonardo DiCaprio,Joseph Gordon-Levitt,Elliot ... Action,Adventure,Sci-Fi,Thriller United States,United Kingdom English,Japanese,French Dom Cobb is a skilled thief, the absolute best... 148 movie 1 0 0 0 0 Netflix
1 2 The Matrix 1999 16 8.7 88 Lana Wachowski,Lilly Wachowski Keanu Reeves,Laurence Fishburne,Carrie-Anne Mo... Action,Sci-Fi United States English Thomas A. Anderson is a man living two lives. ... 136 movie 1 0 0 0 0 Netflix
2 3 Avengers: Infinity War 2018 13 8.4 85 Anthony Russo,Joe Russo Robert Downey Jr.,Chris Hemsworth,Mark Ruffalo... Action,Adventure,Sci-Fi United States English As the Avengers and their allies have continue... 149 movie 1 0 0 0 0 Netflix
3 4 Back to the Future 1985 7 8.5 96 Robert Zemeckis Michael J. Fox,Christopher Lloyd,Lea Thompson,... Adventure,Comedy,Sci-Fi United States English Marty McFly, a typical American teenager of th... 116 movie 1 0 0 0 0 Netflix
4 5 The Good, the Bad and the Ugly 1966 16 8.8 97 Sergio Leone Eli Wallach,Clint Eastwood,Lee Van Cleef,Aldo ... Western Italy,Spain,West Germany,United States Italian Blondie (The Good) (Clint Eastwood) is a profe... 161 movie 1 0 1 0 0 Netflix
In [12]:
df_movies.describe()
Out[12]:
ID Year Netflix Hulu Prime Video Disney+ Type
count 16923.000000 16923.000000 16923.000000 16923.000000 16923.000000 16923.000000 16923.0
mean 8462.000000 2003.211901 0.214915 0.062637 0.727235 0.033150 0.0
std 4885.393638 20.526532 0.410775 0.242315 0.445394 0.179034 0.0
min 1.000000 1901.000000 0.000000 0.000000 0.000000 0.000000 0.0
25% 4231.500000 2001.000000 0.000000 0.000000 0.000000 0.000000 0.0
50% 8462.000000 2012.000000 0.000000 0.000000 1.000000 0.000000 0.0
75% 12692.500000 2016.000000 0.000000 0.000000 1.000000 0.000000 0.0
max 16923.000000 2020.000000 1.000000 1.000000 1.000000 1.000000 0.0
In [13]:
df_movies.corr()
Out[13]:
ID Year Netflix Hulu Prime Video Disney+ Type
ID 1.000000 -0.217816 -0.644470 -0.129926 0.469301 0.263530 NaN
Year -0.217816 1.000000 0.256151 0.101337 -0.255578 -0.047258 NaN
Netflix -0.644470 0.256151 1.000000 -0.118032 -0.745141 -0.089649 NaN
Hulu -0.129926 0.101337 -0.118032 1.000000 -0.284654 -0.039693 NaN
Prime Video 0.469301 -0.255578 -0.745141 -0.284654 1.000000 -0.289008 NaN
Disney+ 0.263530 -0.047258 -0.089649 -0.039693 -0.289008 1.000000 NaN
Type NaN NaN NaN NaN NaN NaN NaN
In [14]:
# df_movies.sort_values('Year', ascending = True)
# df_movies.sort_values('IMDb', ascending = False)
In [15]:
# df_movies.to_csv(path_or_buf= '/content/drive/MyDrive/Files/updated_ottmovies.csv', index = False)
 
# path = '/content/drive/MyDrive/Files/'
 
# udf_movies = pd.read_csv(path + 'updated_ottmovies.csv')
 
# udf_movies
In [16]:
# df_netflix_movies = df_movies.loc[(df_movies['Netflix'] > 0)]
# df_hulu_movies = df_movies.loc[(df_movies['Hulu'] > 0)]
# df_prime_video_movies = df_movies.loc[(df_movies['Prime Video'] > 0)]
# df_disney_movies = df_movies.loc[(df_movies['Disney+'] > 0)]
In [17]:
df_netflix_only_movies = df_movies[(df_movies['Netflix'] == 1) & (df_movies['Hulu'] == 0) & (df_movies['Prime Video'] == 0 ) & (df_movies['Disney+'] == 0)]
df_hulu_only_movies = df_movies[(df_movies['Netflix'] == 0) & (df_movies['Hulu'] == 1) & (df_movies['Prime Video'] == 0 ) & (df_movies['Disney+'] == 0)]
df_prime_video_only_movies = df_movies[(df_movies['Netflix'] == 0) & (df_movies['Hulu'] == 0) & (df_movies['Prime Video'] == 1 ) & (df_movies['Disney+'] == 0)]
df_disney_only_movies = df_movies[(df_movies['Netflix'] == 0) & (df_movies['Hulu'] == 0) & (df_movies['Prime Video'] == 0 ) & (df_movies['Disney+'] == 1)]
In [18]:
df_movies_directors = df_movies.copy()
In [19]:
df_movies_directors.drop(df_movies_directors.loc[df_movies_directors['Directors'] == "NA"].index, inplace = True)
# df_movies_directors = df_movies_directors[df_movies_directors.Director != "NA"]
# df_movies_directors['Director'] = df_movies_directors['Director'].astype(str)
In [20]:
df_movies_count_directors = df_movies_directors.copy()
In [21]:
df_movies_director = df_movies_directors.copy()
In [22]:
# Create directors dict where key=name and value = number of directors
 
directors = {}
 
for i in df_movies_count_directors['Directors'].dropna():
    if i != "NA":
        #print(i,len(i.split(',')))
        directors[i] = len(i.split(','))
    else:
        directors[i] = 0
    
# Add this information to our dataframe as a new column
 
df_movies_count_directors['Number of Directors'] = df_movies_count_directors['Directors'].map(directors).astype(int)
In [23]:
df_movies_mixed_directors = df_movies_count_directors.copy()
In [24]:
# Creating distinct dataframes only with the movies present on individual streaming platforms
netflix_directors_movies = df_movies_count_directors.loc[df_movies_count_directors['Netflix'] == 1]
hulu_directors_movies = df_movies_count_directors.loc[df_movies_count_directors['Hulu'] == 1]
prime_video_directors_movies = df_movies_count_directors.loc[df_movies_count_directors['Prime Video'] == 1]
disney_directors_movies = df_movies_count_directors.loc[df_movies_count_directors['Disney+'] == 1]
In [25]:
plt.figure(figsize = (10, 10))
corr = df_movies_count_directors.corr()
# Plot figsize
fig, ax = plt.subplots(figsize=(10, 8))
# Generate Heat Map, alleast annotations and place floats in map
sns.heatmap(corr, cmap = 'magma', annot = True, fmt = ".2f")
# Apply xticks
plt.xticks(range(len(corr.columns)), corr.columns);
# Apply yticks
plt.yticks(range(len(corr.columns)), corr.columns)
# show plot
plt.show()
fig.show()
<Figure size 720x720 with 0 Axes>
In [26]:
df_directors_most_movies = df_movies_count_directors.sort_values(by = 'Number of Directors', ascending = False).reset_index()
df_directors_most_movies = df_directors_most_movies.drop(['index'], axis = 1)
# filter = (df_movies_count_directors['Number of Directors'] == (df_movies_count_directors['Number of Directors'].max()))
# df_directors_most_movies = df_movies_count_directors[filter]
 
# mostest_rated_movies = df_movies_count_directors.loc[df_movies_count_directors['Number of Directors'].idxmax()]
 
print('\nMovies with Highest Ever Number of Directors are : \n')
df_directors_most_movies.head(5)
Movies with Highest Ever Number of Directors are : 

Out[26]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Plotline Runtime Kind Netflix Hulu Prime Video Disney+ Type Service Provider Number of Directors
0 11086 The Owner 2012 NR 6.9 NA Xavier Agudo,Ian Bonner,Michael Canzoniero,Fra... Jorge Mario Agudelo,Chiraz Aich,Christine Altm... Drama United States,Germany ... NA 94 movie 0 0 1 0 0 Prime Video 28
1 12269 Fun Size Horror: Volume One 2015 NR 4.6 NA Bryan Chojnowski,Lisa J Dooley,Ned Ehrbar,Mali... Tara Perry,Aidan Flynn,Guy Perry,Nev Scharrel,... Horror United States ... Alienated by her peers as a young girl, Scarle... 86 movie 0 0 1 0 0 Prime Video 18
2 11869 A Taste of Phobia 2018 NR 3.1 NA Domiziano Cristopharo,Jason Impey,Sunny King,S... Lianne O'Shea,Kehinde Bankole,Roberta Gemma,Ma... Horror United Kingdom ... Blending drama with the explanations of passio... 90 movie 0 0 1 0 0 Prime Video 17
3 4422 The Proposition 2005 16 4.3 85 Elizabeth Banks,Steven Brill,Steve Carr,Rusty ... Dennis Quaid,Greg Kinnear,Common,Charlie Saxto... Comedy United States ... Ineffectual, 'has-been' film-maker (Dennis Qua... 94 movie 0 0 1 0 0 Prime Video 13
4 2267 Truth or Dare 2017 16 4.3 NA Elizabeth Banks,Steven Brill,Steve Carr,Rusty ... Dennis Quaid,Greg Kinnear,Common,Charlie Saxto... Comedy United States ... Ineffectual, 'has-been' film-maker (Dennis Qua... 94 movie 1 0 1 0 0 Netflix 13

5 rows × 21 columns

In [27]:
fig = px.bar(y = df_directors_most_movies['Title'][:15],
             x = df_directors_most_movies['Number of Directors'][:15], 
             color = df_directors_most_movies['Number of Directors'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Number of Directors'},
             title  = 'Movies with Highest Number of Directors : All Platforms')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [28]:
df_directors_least_movies = df_movies_count_directors.sort_values(by = 'Number of Directors', ascending = True).reset_index()
df_directors_least_movies = df_directors_least_movies.drop(['index'], axis = 1)
# filter = (df_movies_count_directors['Number of Directors'] == (df_movies_count_directors['Number of Directors'].min()))
# df_directors_least_movies = df_movies_count_directors[filter]

print('\nMovies with Lowest Ever Number of Directors are : \n')
df_directors_least_movies.head(5)
Movies with Lowest Ever Number of Directors are : 

Out[28]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Plotline Runtime Kind Netflix Hulu Prime Video Disney+ Type Service Provider Number of Directors
0 1 Inception 2010 13 8.8 87 Christopher Nolan Leonardo DiCaprio,Joseph Gordon-Levitt,Elliot ... Action,Adventure,Sci-Fi,Thriller United States,United Kingdom ... Dom Cobb is a skilled thief, the absolute best... 148 movie 1 0 0 0 0 Netflix 1
1 10899 Secret Mission 1942 NR 5.5 NA Harold French Hugh Williams,Carla Lehmann,James Mason,Roland... Drama,Thriller,War United Kingdom ... 40 Nights is the first of the QUEST TRILOGY - ... 94 movie 0 0 1 0 0 Prime Video 1
2 10901 Her Side of the Bed 2018 NR 4.1 NA Bryn Woznicki Chelsea Morgan,Bryn Woznicki,Kissyc Alonso,Ada... Comedy,Drama,Romance United States ... 'The Count of Monte Cristo' is an adaptation o... 97 movie 0 0 1 0 0 Prime Video 1
3 10902 Kyle Cease: Weirder. Blacker. Dimpler. 2007 NR 7 NA Craig Kelly Kyle Cease Comedy United States ... In Barefoot County, hot mama Mary Ann Hogan ru... 60 movie 0 0 1 0 0 Prime Video 1
4 10903 My Life Without Air 2017 13 7.1 NA Bojana Burnac Goran Colak,Ivan Drvis Documentary Croatia ... Handguns figure in the intertwining lives of n... 72 movie 0 0 1 0 0 Prime Video 1

5 rows × 21 columns

In [29]:
fig = px.bar(y = df_directors_least_movies['Title'][:15],
             x = df_directors_least_movies['Number of Directors'][:15], 
             color = df_directors_least_movies['Number of Directors'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Number of Directors'},
             title  = 'Movies with Lowest Number of Directors : All Platforms')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [30]:
print(f'''
      Total '{df_movies_count_directors['Number of Directors'].unique().shape[0]}' unique Number of Directors s were Given, They were Like this,\n
      
      {df_movies_count_directors.sort_values(by = 'Number of Directors', ascending = False)['Number of Directors'].unique()}\n
 
      The Highest Number of Directors Ever Any Movie Got is '{df_directors_most_movies['Title'][0]}' : '{df_directors_most_movies['Number of Directors'].max()}'\n
 
      The Lowest Number of Directors Ever Any Movie Got is '{df_directors_least_movies['Title'][0]}' : '{df_directors_least_movies['Number of Directors'].min()}'\n
      ''')
      Total '16' unique Number of Directors s were Given, They were Like this,

      
      [28 18 17 13 12 11 10  9  8  7  6  5  4  3  2  1]

 
      The Highest Number of Directors Ever Any Movie Got is 'The Owner' : '28'

 
      The Lowest Number of Directors Ever Any Movie Got is 'Inception' : '1'

      
In [31]:
netflix_directors_most_movies = df_directors_most_movies.loc[df_directors_most_movies['Netflix']==1].reset_index()
netflix_directors_most_movies = netflix_directors_most_movies.drop(['index'], axis = 1)
 
netflix_directors_least_movies = df_directors_least_movies.loc[df_directors_least_movies['Netflix']==1].reset_index()
netflix_directors_least_movies = netflix_directors_least_movies.drop(['index'], axis = 1)
 
netflix_directors_most_movies.head(5)
Out[31]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Plotline Runtime Kind Netflix Hulu Prime Video Disney+ Type Service Provider Number of Directors
0 2267 Truth or Dare 2017 16 4.3 NA Elizabeth Banks,Steven Brill,Steve Carr,Rusty ... Dennis Quaid,Greg Kinnear,Common,Charlie Saxto... Comedy United States ... Ineffectual, 'has-been' film-maker (Dennis Qua... 94 movie 1 0 1 0 0 Netflix 13
1 359 Veronica 2017 16 4.3 79 Elizabeth Banks,Steven Brill,Steve Carr,Rusty ... Dennis Quaid,Greg Kinnear,Common,Charlie Saxto... Comedy United States ... Ineffectual, 'has-been' film-maker (Dennis Qua... 94 movie 1 0 0 0 0 Netflix 13
2 2358 Berlin, I Love You 2019 16 4.6 11 Dianna Agron,Peter Chelsom,Claus Clausen,Ferna... Keira Knightley,Helen Mirren,Luke Wilson,Jim S... Drama,Romance Germany ... NA 120 movie 1 0 0 0 0 Netflix 13
3 2974 X: Past Is Present 2015 13 5.3 NA Hemant Gaba,Pratim D. Gupta,Sudhish Kamath,Nal... Rajat Kapoor,Radhika Apte,Bidita Bag,Piaa Bajp... Drama,Mystery,Romance India ... NA 105 movie 1 0 1 0 0 Netflix 11
4 660 Kahlil Gibran's The Prophet 2014 7 7.1 66 Roger Allers,Gaëtan Brizzi,Paul Brizzi,Joan C.... Liam Neeson,Salma Hayek,John Krasinski,Frank L... Animation,Drama Qatar,France,Lebanon,Canada,United States,Irel... ... NA 85 movie 1 0 0 0 0 Netflix 10

5 rows × 21 columns

In [32]:
fig = px.bar(y = netflix_directors_most_movies['Title'][:15],
             x = netflix_directors_most_movies['Number of Directors'][:15], 
             color = netflix_directors_most_movies['Number of Directors'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Number of Directors'},
             title  = 'Movies with Highest Number of Directors : Netflix')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [33]:
fig = px.bar(y = netflix_directors_least_movies['Title'][:15],
             x = netflix_directors_least_movies['Number of Directors'][:15], 
             color = netflix_directors_least_movies['Number of Directors'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Number of Directors'},
             title  = 'Movies with Lowest Number of Directors : Netflix')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [34]:
hulu_directors_most_movies = df_directors_most_movies.loc[df_directors_most_movies['Hulu']==1].reset_index()
hulu_directors_most_movies = hulu_directors_most_movies.drop(['index'], axis = 1)
 
hulu_directors_least_movies = df_directors_least_movies.loc[df_directors_least_movies['Hulu']==1].reset_index()
hulu_directors_least_movies = hulu_directors_least_movies.drop(['index'], axis = 1)
 
hulu_directors_most_movies.head(5)
Out[34]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Plotline Runtime Kind Netflix Hulu Prime Video Disney+ Type Service Provider Number of Directors
0 4032 Wakko's Wish 1999 0 7.3 NA Liz Holzman,Rusty Mills,Tom Ruegger,Russell Ca... Rob Paulsen,Jess Harnell,Tress MacNeille,Mauri... Animation,Adventure,Comedy,Drama,Family,Fantas... United States ... The Warner Brothers (and the Warner Sister) go... 80 movie 0 1 0 0 0 Hulu 8
1 3704 Southbound 2015 16 5.9 81 Roxanne Benjamin,Matt Bettinelli-Olpin,David B... Chad Villella,Matt Bettinelli-Olpin,Kristina P... Horror United States ... On a desolate stretch of desert highway, weary... 89 movie 0 1 1 0 0 Prime Video 8
2 3912 Tiny Toon Adventures: How I Spent My Vacation 1992 NR 8 NA Rich Arons,Ken Boyer,Kent Butterworth,Barry Ca... Charlie Adler,Tress MacNeille,Joe Alaskey,Don ... Animation,Adventure,Comedy,Family,Fantasy United States ... Term-time ends at Acme Looniversity and the Ti... 79 movie 0 1 0 0 0 Hulu 7
3 16501 Victoria's Secret Fashion Show 1999 7 7.6 NA Hamish Hamilton,Yemisi Brookes,Dee Koppang O'L... Behati Prinsloo,Adriana Lima,Alessandra Ambros... Reality-TV United States ... Shinichi Kanou is a young secluded Otaku who i... 45 movie 0 1 0 0 0 Hulu 4
4 3531 The Prince of Egypt 1998 7 7.1 80 Brenda Chapman,Steve Hickner,Simon Wells Val Kilmer,Ralph Fiennes,Michelle Pfeiffer,San... Animation,Adventure,Drama,Family,Fantasy,Musical United States,France,United Kingdom ... This is the extraordinary tale of two brothers... 99 movie 0 1 0 0 0 Hulu 3

5 rows × 21 columns

In [35]:
fig = px.bar(y = hulu_directors_most_movies['Title'][:15],
             x = hulu_directors_most_movies['Number of Directors'][:15], 
             color = hulu_directors_most_movies['Number of Directors'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Number of Directors'},
             title  = 'Movies with Highest Number of Directors : Hulu')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [36]:
fig = px.bar(y = hulu_directors_least_movies['Title'][:15],
             x = hulu_directors_least_movies['Number of Directors'][:15], 
             color = hulu_directors_least_movies['Number of Directors'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Number of Directors'},
             title  = 'Movies with Lowest Number of Directors : Hulu')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [37]:
prime_video_directors_most_movies = df_directors_most_movies.loc[df_directors_most_movies['Prime Video']==1].reset_index()
prime_video_directors_most_movies = prime_video_directors_most_movies.drop(['index'], axis = 1)
 
prime_video_directors_least_movies = df_directors_least_movies.loc[df_directors_least_movies['Prime Video']==1].reset_index()
prime_video_directors_least_movies = prime_video_directors_least_movies.drop(['index'], axis = 1)
 
prime_video_directors_most_movies.head(5)
Out[37]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Plotline Runtime Kind Netflix Hulu Prime Video Disney+ Type Service Provider Number of Directors
0 11086 The Owner 2012 NR 6.9 NA Xavier Agudo,Ian Bonner,Michael Canzoniero,Fra... Jorge Mario Agudelo,Chiraz Aich,Christine Altm... Drama United States,Germany ... NA 94 movie 0 0 1 0 0 Prime Video 28
1 12269 Fun Size Horror: Volume One 2015 NR 4.6 NA Bryan Chojnowski,Lisa J Dooley,Ned Ehrbar,Mali... Tara Perry,Aidan Flynn,Guy Perry,Nev Scharrel,... Horror United States ... Alienated by her peers as a young girl, Scarle... 86 movie 0 0 1 0 0 Prime Video 18
2 11869 A Taste of Phobia 2018 NR 3.1 NA Domiziano Cristopharo,Jason Impey,Sunny King,S... Lianne O'Shea,Kehinde Bankole,Roberta Gemma,Ma... Horror United Kingdom ... Blending drama with the explanations of passio... 90 movie 0 0 1 0 0 Prime Video 17
3 4422 The Proposition 2005 16 4.3 85 Elizabeth Banks,Steven Brill,Steve Carr,Rusty ... Dennis Quaid,Greg Kinnear,Common,Charlie Saxto... Comedy United States ... Ineffectual, 'has-been' film-maker (Dennis Qua... 94 movie 0 0 1 0 0 Prime Video 13
4 2267 Truth or Dare 2017 16 4.3 NA Elizabeth Banks,Steven Brill,Steve Carr,Rusty ... Dennis Quaid,Greg Kinnear,Common,Charlie Saxto... Comedy United States ... Ineffectual, 'has-been' film-maker (Dennis Qua... 94 movie 1 0 1 0 0 Netflix 13

5 rows × 21 columns

In [38]:
fig = px.bar(y = prime_video_directors_most_movies['Title'][:15],
             x = prime_video_directors_most_movies['Number of Directors'][:15], 
             color = prime_video_directors_most_movies['Number of Directors'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Number of Directors'},
             title  = 'Movies with Highest Number of Directors : Prime Video')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [39]:
fig = px.bar(y = prime_video_directors_least_movies['Title'][:15],
             x = prime_video_directors_least_movies['Number of Directors'][:15], 
             color = prime_video_directors_least_movies['Number of Directors'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Number of Directors'},
             title  = 'Movies with Lowest Number of Directors : Prime Video')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [40]:
disney_directors_most_movies = df_directors_most_movies.loc[df_directors_most_movies['Disney+']==1].reset_index()
disney_directors_most_movies = disney_directors_most_movies.drop(['index'], axis = 1)
 
disney_directors_least_movies = df_directors_least_movies.loc[df_directors_least_movies['Disney+']==1].reset_index()
disney_directors_least_movies = disney_directors_least_movies.drop(['index'], axis = 1)
 
disney_directors_most_movies.head(5)
Out[40]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Plotline Runtime Kind Netflix Hulu Prime Video Disney+ Type Service Provider Number of Directors
0 15786 Fantasia 1940 0 7.7 95 James Algar,Samuel Armstrong,Ford Beebe Jr.,No... Deems Taylor,Leopold Stokowski,The Philadelphi... Animation,Family,Fantasy,Music,Musical United States ... Alice, an unpretentious and individual 19-year... 125 movie 0 0 0 1 0 Disney+ 12
1 15797 Bambi 1942 0 7.3 90 James Algar,Samuel Armstrong,David Hand,Graham... Hardie Albright,Stan Alexander,Bobette Audrey,... Animation,Drama,Family United States ... When two pre-teens named Hallie and Annie meet... 70 movie 0 0 0 1 0 Disney+ 9
2 16077 Belle's Magical World 1998 0 5.3 17 Bob Kline,Cullen Blaine,Dale Case,Daniel de la... Jeff Bennett,Robby Benson,Paige O'Hara,Jim Cum... Animation,Comedy,Family,Fantasy,Musical,Romance United States ... Robbie, the master's baby, has been mysterious... 92 movie 0 0 0 1 0 Disney+ 8
3 15835 Fantasia 2000 1999 0 7.2 81 James Algar,Gaëtan Brizzi,Paul Brizzi,Hendel B... Steve Martin,Itzhak Perlman,Quincy Jones,Bette... Animation,Comedy,Family,Fantasy,Music United States ... Captain Jack Sparrow (Johnny Depp) crosses pat... 75 movie 0 0 0 1 0 Disney+ 8
4 15777 Snow White and the Seven Dwarfs 1937 0 7.6 98 William Cottrell,David Hand,Wilfred Jackson,La... Roy Atwell,Stuart Buchanan,Adriana Caselotti,E... Animation,Family,Fantasy,Musical,Romance United States ... NA 83 movie 0 0 0 1 0 Disney+ 6

5 rows × 21 columns

In [41]:
fig = px.bar(y = disney_directors_most_movies['Title'][:15],
             x = disney_directors_most_movies['Number of Directors'][:15], 
             color = disney_directors_most_movies['Number of Directors'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Number of Directors'},
             title  = 'Movies with Highest Number of Directors : Disney+')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [42]:
fig = px.bar(y = disney_directors_least_movies['Title'][:15],
             x = disney_directors_least_movies['Number of Directors'][:15], 
             color = disney_directors_least_movies['Number of Directors'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Number of Directors'},
             title  = 'Movies with Lowest Number of Directors : Disney+')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [43]:
print(f'''
      The Movie with Highest Number of Directors Ever Got is '{df_directors_most_movies['Title'][0]}' : '{df_directors_most_movies['Number of Directors'].max()}'\n
      The Movie with Lowest Number of Directors Ever Got is '{df_directors_least_movies['Title'][0]}' : '{df_directors_least_movies['Number of Directors'].min()}'\n
      
      The Movie with Highest Number of Directors on 'Netflix' is '{netflix_directors_most_movies['Title'][0]}' : '{netflix_directors_most_movies['Number of Directors'].max()}'\n
      The Movie with Lowest Number of Directors on 'Netflix' is '{netflix_directors_least_movies['Title'][0]}' : '{netflix_directors_least_movies['Number of Directors'].min()}'\n
      
      The Movie with Highest Number of Directors on 'Hulu' is '{hulu_directors_most_movies['Title'][0]}' : '{hulu_directors_most_movies['Number of Directors'].max()}'\n
      The Movie with Lowest Number of Directors on 'Hulu' is '{hulu_directors_least_movies['Title'][0]}' : '{hulu_directors_least_movies['Number of Directors'].min()}'\n
      
      The Movie with Highest Number of Directors on 'Prime Video' is '{prime_video_directors_most_movies['Title'][0]}' : '{prime_video_directors_most_movies['Number of Directors'].max()}'\n
      The Movie with Lowest Number of Directors on 'Prime Video' is '{prime_video_directors_least_movies['Title'][0]}' : '{prime_video_directors_least_movies['Number of Directors'].min()}'\n
      
      The Movie with Highest Number of Directors on 'Disney+' is '{disney_directors_most_movies['Title'][0]}' : '{disney_directors_most_movies['Number of Directors'].max()}'\n
      The Movie with Lowest Number of Directors on 'Disney+' is '{disney_directors_least_movies['Title'][0]}' : '{disney_directors_least_movies['Number of Directors'].min()}'\n 
      ''')
      The Movie with Highest Number of Directors Ever Got is 'The Owner' : '28'

      The Movie with Lowest Number of Directors Ever Got is 'Inception' : '1'

      
      The Movie with Highest Number of Directors on 'Netflix' is 'Truth or Dare' : '13'

      The Movie with Lowest Number of Directors on 'Netflix' is 'Inception' : '1'

      
      The Movie with Highest Number of Directors on 'Hulu' is 'Wakko's Wish' : '8'

      The Movie with Lowest Number of Directors on 'Hulu' is 'Home Free' : '1'

      
      The Movie with Highest Number of Directors on 'Prime Video' is 'The Owner' : '28'

      The Movie with Lowest Number of Directors on 'Prime Video' is 'Secret Mission' : '1'

      
      The Movie with Highest Number of Directors on 'Disney+' is 'Fantasia' : '12'

      The Movie with Lowest Number of Directors on 'Disney+' is 'The Swap' : '1'
 
      
In [44]:
print(f'''
      Accross All Platforms the Average Number of Directors is '{round(df_movies_count_directors['Number of Directors'].mean(), ndigits = 2)}'\n
      The Average Number of Directors on 'Netflix' is '{round(netflix_directors_movies['Number of Directors'].mean(), ndigits = 2)}'\n
      The Average Number of Directors on 'Hulu' is '{round(hulu_directors_movies['Number of Directors'].mean(), ndigits = 2)}'\n
      The Average Number of Directors on 'Prime Video' is '{round(prime_video_directors_movies['Number of Directors'].mean(), ndigits = 2)}'\n
      The Average Number of Directors on 'Disney+' is '{round(disney_directors_movies['Number of Directors'].mean(), ndigits = 2)}'\n 
      ''')
      Accross All Platforms the Average Number of Directors is '1.14'

      The Average Number of Directors on 'Netflix' is '1.15'

      The Average Number of Directors on 'Hulu' is '1.12'

      The Average Number of Directors on 'Prime Video' is '1.13'

      The Average Number of Directors on 'Disney+' is '1.34'
 
      
In [45]:
print(f'''
      Accross All Platforms Total Count of Director is '{df_movies_count_directors['Number of Directors'].max()}'\n
      Total Count of Director on 'Netflix' is '{netflix_directors_movies['Number of Directors'].max()}'\n
      Total Count of Director on 'Hulu' is '{hulu_directors_movies['Number of Directors'].max()}'\n
      Total Count of Director on 'Prime Video' is '{prime_video_directors_movies['Number of Directors'].max()}'\n
      Total Count of Director on 'Disney+' is '{disney_directors_movies['Number of Directors'].max()}'\n 
      ''')
      Accross All Platforms Total Count of Director is '28'

      Total Count of Director on 'Netflix' is '13'

      Total Count of Director on 'Hulu' is '8'

      Total Count of Director on 'Prime Video' is '28'

      Total Count of Director on 'Disney+' is '12'
 
      
In [46]:
f, ax = plt.subplots(1, 2 , figsize = (20, 5))
sns.distplot(df_movies_count_directors['Number of Directors'],bins = 20, kde = True, ax = ax[0])
sns.boxplot(df_movies_count_directors['Number of Directors'], ax = ax[1])
plt.show()
In [47]:
# Defining plot size and title
plt.figure(figsize = (20, 5))
plt.title('Number of Directors s Per Platform')
 
# Plotting the information from each dataset into a histogram
sns.histplot(prime_video_directors_movies['Number of Directors'], color = 'lightblue', legend = True, kde = True)
sns.histplot(netflix_directors_movies['Number of Directors'], color = 'red', legend = True, kde = True)
sns.histplot(hulu_directors_movies['Number of Directors'], color = 'lightgreen', legend = True, kde = True)
sns.histplot(disney_directors_movies['Number of Directors'], color = 'darkblue', legend = True, kde = True) 
 
# Setting the legend
plt.legend(['Prime Video', 'Netflix', 'Hulu', 'Disney+'])
plt.show()
In [48]:
df_lan = df_movies_director['Directors'].str.split(',').apply(pd.Series).stack()
del df_movies_director['Directors']
df_lan.index = df_lan.index.droplevel(-1)
df_lan.name = 'Director'
df_movies_director = df_movies_director.join(df_lan)
df_movies_director.drop_duplicates(inplace = True)
In [49]:
df_movies_director.head(5)
Out[49]:
ID Title Year Age IMDb Rotten Tomatoes Cast Genres Country Language Plotline Runtime Kind Netflix Hulu Prime Video Disney+ Type Service Provider Director
0 1 Inception 2010 13 8.8 87 Leonardo DiCaprio,Joseph Gordon-Levitt,Elliot ... Action,Adventure,Sci-Fi,Thriller United States,United Kingdom English,Japanese,French Dom Cobb is a skilled thief, the absolute best... 148 movie 1 0 0 0 0 Netflix Christopher Nolan
1 2 The Matrix 1999 16 8.7 88 Keanu Reeves,Laurence Fishburne,Carrie-Anne Mo... Action,Sci-Fi United States English Thomas A. Anderson is a man living two lives. ... 136 movie 1 0 0 0 0 Netflix Lana Wachowski
1 2 The Matrix 1999 16 8.7 88 Keanu Reeves,Laurence Fishburne,Carrie-Anne Mo... Action,Sci-Fi United States English Thomas A. Anderson is a man living two lives. ... 136 movie 1 0 0 0 0 Netflix Lilly Wachowski
2 3 Avengers: Infinity War 2018 13 8.4 85 Robert Downey Jr.,Chris Hemsworth,Mark Ruffalo... Action,Adventure,Sci-Fi United States English As the Avengers and their allies have continue... 149 movie 1 0 0 0 0 Netflix Anthony Russo
2 3 Avengers: Infinity War 2018 13 8.4 85 Robert Downey Jr.,Chris Hemsworth,Mark Ruffalo... Action,Adventure,Sci-Fi United States English As the Avengers and their allies have continue... 149 movie 1 0 0 0 0 Netflix Joe Russo
In [50]:
director_count = df_movies_director.groupby('Director')['Title'].count()
director_movies = df_movies_director.groupby('Director')[['Netflix', 'Hulu', 'Prime Video', 'Disney+']].sum()
director_data_movies = pd.concat([director_count, director_movies], axis = 1).reset_index().rename(columns = {'Title' : 'Movies Count'})
director_data_movies = director_data_movies.sort_values(by = 'Movies Count', ascending = False)
In [51]:
# Director with Movies Counts - All Platforms Combined
director_data_movies.sort_values(by = 'Movies Count', ascending = False)[:10]
Out[51]:
Director Movies Count Netflix Hulu Prime Video Disney+
5226 Jay Chapman 36 12 0 29 0
6248 Joseph Kane 31 0 0 31 0
1985 Cheh Chang 29 2 0 28 0
10597 Sam Newfield 23 1 0 22 0
5621 Jim Wynorski 23 0 0 23 0
5107 Jan Suter 21 21 0 0 0
2792 David DeCoteau 21 0 0 21 0
9783 Raúl Campos 21 21 0 0 0
5232 Jay Karas 21 15 1 6 1
12460 William Beaudine 20 0 0 20 0
In [52]:
fig = px.bar(x = director_data_movies['Director'][:50],
             y = director_data_movies['Movies Count'][:50], 
             color = director_data_movies['Movies Count'][:50],
             color_continuous_scale = 'Teal_r', 
             labels = { 'x' : 'Director', 'y' : 'Movies Count'},
             title  = 'Major Directors : All Platforms')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [53]:
df_director_high_movies = director_data_movies.sort_values(by = 'Movies Count', ascending = False).reset_index()
df_director_high_movies = df_director_high_movies.drop(['index'], axis = 1)
# filter = (director_data_movies['Movies Count'] == (director_data_movies['Movies Count'].max()))
# df_director_high_movies = director_data_movies[filter]
 
# highest_rated_movies = director_data_movies.loc[director_data_movies['Movies Count'].idxmax()]
 
print('\nDirector with Highest Ever Movies Count are : All Platforms Combined\n')
df_director_high_movies.head(5)
Director with Highest Ever Movies Count are : All Platforms Combined

Out[53]:
Director Movies Count Netflix Hulu Prime Video Disney+
0 Jay Chapman 36 12 0 29 0
1 Joseph Kane 31 0 0 31 0
2 Cheh Chang 29 2 0 28 0
3 Sam Newfield 23 1 0 22 0
4 Jim Wynorski 23 0 0 23 0
In [54]:
fig = px.bar(y = df_director_high_movies['Director'][:15],
             x = df_director_high_movies['Movies Count'][:15], 
             color = df_director_high_movies['Movies Count'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Director', 'x' : 'Movies Count'},
             title  = 'Director with Highest Movies : All Platforms')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [55]:
df_director_low_movies = director_data_movies.sort_values(by = 'Movies Count', ascending = True).reset_index()
df_director_low_movies = df_director_low_movies.drop(['index'], axis = 1)
# filter = (director_data_movies['Movies Count'] == (director_data_movies['Movies Count'].min()))
# df_director_low_movies = director_data_movies[filter]

print('\nDirector with Lowest Ever Movies Count are : All Platforms Combined\n')
df_director_low_movies.head(5)
Director with Lowest Ever Movies Count are : All Platforms Combined

Out[55]:
Director Movies Count Netflix Hulu Prime Video Disney+
0 Peter Ettedgui 1 0 0 1 0
1 Dexton Deboree 1 0 1 0 0
2 Dezsö Magyar 1 0 0 1 0
3 Dhanush 1 1 0 1 0
4 Dheer Momaya 1 1 0 0 0
In [56]:
fig = px.bar(y = df_director_low_movies['Director'][:15],
             x = df_director_low_movies['Movies Count'][:15], 
             color = df_director_low_movies['Movies Count'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Director', 'x' : 'Movies Count'},
             title  = 'Director with Lowest Movies Count : All Platforms')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [57]:
print(f'''
      Total '{director_data_movies['Director'].unique().shape[0]}' unique Director Count s were Given, They were Like this,\n
      
      {director_data_movies.sort_values(by = 'Movies Count', ascending = False)['Director'].unique()[:5]}\n
 
      The Highest Ever Movies Count Ever Any Movie Got is '{df_director_high_movies['Director'][0]}' : '{df_director_high_movies['Movies Count'].max()}'\n
 
      The Lowest Ever Movies Count Ever Any Movie Got is '{df_director_low_movies['Director'][0]}' : '{df_director_low_movies['Movies Count'].min()}'\n
      ''')
      Total '12760' unique Director Count s were Given, They were Like this,

      
      ['Jay Chapman' 'Joseph Kane' 'Cheh Chang' 'Sam Newfield' 'Jim Wynorski']

 
      The Highest Ever Movies Count Ever Any Movie Got is 'Jay Chapman' : '36'

 
      The Lowest Ever Movies Count Ever Any Movie Got is 'Peter Ettedgui' : '1'

      
In [58]:
fig = px.pie(director_data_movies[:10], names = 'Director', values = 'Movies Count', color_discrete_sequence = px.colors.sequential.Teal)
fig.update_traces(textposition = 'inside', textinfo = 'percent+label', title = 'Movies Count based on Director')
fig.show()
In [59]:
# netflix_director_movies = director_data_movies[director_data_movies['Netflix'] !=  0].sort_values(by = 'Netflix', ascending = False).reset_index()
# netflix_director_movies = netflix_director_movies.drop(['index', 'Hulu', 'Prime Video', 'Disney+', 'Movies Count'], axis = 1)
 
netflix_director_high_movies = df_director_high_movies.sort_values(by = 'Netflix', ascending = False).reset_index()
netflix_director_high_movies = netflix_director_high_movies.drop(['index'], axis = 1)
 
netflix_director_low_movies = df_director_high_movies.sort_values(by = 'Netflix', ascending = True).reset_index()
netflix_director_low_movies = netflix_director_low_movies.drop(['index'], axis = 1)
 
netflix_director_high_movies.head(5)
Out[59]:
Director Movies Count Netflix Hulu Prime Video Disney+
0 Jan Suter 21 21 0 0 0
1 Raúl Campos 21 21 0 0 0
2 Marcus Raboy 18 16 0 2 0
3 Jay Karas 21 15 1 6 1
4 Jay Chapman 36 12 0 29 0
In [60]:
fig = px.bar(x = netflix_director_high_movies['Director'][:15],
             y = netflix_director_high_movies['Netflix'][:15], 
             color = netflix_director_high_movies['Netflix'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Director', 'x' : 'Movies Count'},
             title  = 'Director with Highest Movies : Netflix')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [61]:
# hulu_director_movies = director_data_movies[director_data_movies['Hulu'] !=  0].sort_values(by = 'Hulu', ascending = False).reset_index()
# hulu_director_movies = hulu_director_movies.drop(['index', 'Netflix', 'Prime Video', 'Disney+', 'Movies Count'], axis = 1)
 
hulu_director_high_movies = df_director_high_movies.sort_values(by = 'Hulu', ascending = False).reset_index()
hulu_director_high_movies = hulu_director_high_movies.drop(['index'], axis = 1)
 
hulu_director_low_movies = df_director_high_movies.sort_values(by = 'Hulu', ascending = True).reset_index()
hulu_director_low_movies = hulu_director_low_movies.drop(['index'], axis = 1)
 
hulu_director_high_movies.head(5)
Out[61]:
Director Movies Count Netflix Hulu Prime Video Disney+
0 Savage Steve Holland 6 2 5 0 1
1 Tyler Perry 6 2 4 4 0
2 Richard Rich 10 2 4 2 2
3 Alan Metter 4 1 3 1 0
4 William Lau 5 2 3 0 0
In [62]:
fig = px.bar(x = hulu_director_high_movies['Director'][:15],
             y = hulu_director_high_movies['Hulu'][:15], 
             color = hulu_director_high_movies['Hulu'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Director', 'x' : 'Movies Count'},
             title  = 'Director with Highest Movies : Hulu')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [63]:
# prime_video_director_movies = director_data_movies[director_data_movies['Prime Video'] !=  0].sort_values(by = 'Prime Video', ascending = False).reset_index()
# prime_video_director_movies = prime_video_director_movies.drop(['index', 'Netflix', 'Hulu', 'Disney+', 'Movies Count'], axis = 1)
 
prime_video_director_high_movies = df_director_high_movies.sort_values(by = 'Prime Video', ascending = False).reset_index()
prime_video_director_high_movies = prime_video_director_high_movies.drop(['index'], axis = 1)
 
prime_video_director_low_movies = df_director_high_movies.sort_values(by = 'Prime Video', ascending = True).reset_index()
prime_video_director_low_movies = prime_video_director_low_movies.drop(['index'], axis = 1)
 
prime_video_director_high_movies.head(5)
Out[63]:
Director Movies Count Netflix Hulu Prime Video Disney+
0 Joseph Kane 31 0 0 31 0
1 Jay Chapman 36 12 0 29 0
2 Cheh Chang 29 2 0 28 0
3 Jim Wynorski 23 0 0 23 0
4 Sam Newfield 23 1 0 22 0
In [64]:
fig = px.bar(x = prime_video_director_high_movies['Director'][:15],
             y = prime_video_director_high_movies['Prime Video'][:15], 
             color = prime_video_director_high_movies['Prime Video'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Director', 'x' : 'Movies Count'},
             title  = 'Director with Highest Movies : Prime Video')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [65]:
# disney_director_movies = director_data_movies[director_data_movies['Disney+'] !=  0].sort_values(by = 'Disney+', ascending = False).reset_index()
# disney_director_movies = disney_director_movies.drop(['index', 'Netflix', 'Hulu', 'Prime Video', 'Movies Count'], axis = 1)
 
disney_director_high_movies = df_director_high_movies.sort_values(by = 'Disney+', ascending = False).reset_index()
disney_director_high_movies = disney_director_high_movies.drop(['index'], axis = 1)
 
disney_director_low_movies = df_director_high_movies.sort_values(by = 'Disney+', ascending = True).reset_index()
disney_director_low_movies = disney_director_low_movies.drop(['index'], axis = 1)
 
disney_director_high_movies.head(5)
Out[65]:
Director Movies Count Netflix Hulu Prime Video Disney+
0 Paul Hoen 15 0 0 0 15
1 James Algar 12 0 0 0 12
2 Robert Stevenson 14 0 1 2 11
3 Vincent McEveety 9 0 0 0 9
4 Kenny Ortega 8 0 0 0 8
In [66]:
fig = px.bar(x = disney_director_high_movies['Director'][:15],
             y = disney_director_high_movies['Disney+'][:15], 
             color = disney_director_high_movies['Disney+'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Director', 'x' : 'Movies Count'},
             title  = 'Director with Highest Movies : Disney+')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [67]:
f, ax = plt.subplots(1, 2 , figsize = (20, 5))
sns.distplot(director_data_movies['Movies Count'], bins = 20, kde = True, ax = ax[0])
sns.boxplot(director_data_movies['Movies Count'], ax = ax[1])
plt.show()
In [68]:
# Creating distinct dataframes only with the movies present on individual streaming platforms
netflix_director_movies = director_data_movies[director_data_movies['Netflix'] !=  0].sort_values(by = 'Netflix', ascending = False).reset_index()
netflix_director_movies = netflix_director_movies.drop(['index', 'Hulu', 'Prime Video', 'Disney+', 'Movies Count'], axis = 1)

hulu_director_movies = director_data_movies[director_data_movies['Hulu'] !=  0].sort_values(by = 'Hulu', ascending = False).reset_index()
hulu_director_movies = hulu_director_movies.drop(['index', 'Netflix', 'Prime Video', 'Disney+', 'Movies Count'], axis = 1)

prime_video_director_movies = director_data_movies[director_data_movies['Prime Video'] !=  0].sort_values(by = 'Prime Video', ascending = False).reset_index()
prime_video_director_movies = prime_video_director_movies.drop(['index', 'Netflix', 'Hulu', 'Disney+', 'Movies Count'], axis = 1)

disney_director_movies = director_data_movies[director_data_movies['Disney+'] !=  0].sort_values(by = 'Disney+', ascending = False).reset_index()
disney_director_movies = disney_director_movies.drop(['index', 'Netflix', 'Hulu', 'Prime Video', 'Movies Count'], axis = 1)
In [69]:
# Defining plot size and title
plt.figure(figsize = (20, 5))
plt.title('Director Movies Count Per Platform')
 
# Plotting the information from each dataset into a histogram
 
sns.histplot(disney_director_movies['Disney+'][:50], color = 'darkblue', legend = True, kde = True)  
sns.histplot(prime_video_director_movies['Prime Video'][:50], color = 'lightblue', legend = True, kde = True)
sns.histplot(netflix_director_movies['Netflix'][:50], color = 'red', legend = True, kde = True)
sns.histplot(hulu_director_movies['Hulu'][:50], color = 'lightgreen', legend = True, kde = True)                                
 
# Setting the legend
plt.legend(['Disney+', 'Prime Video', 'Netflix', 'Hulu'])
plt.show()
In [70]:
print(f'''
      The Director with Highest Movies Count Ever Got is '{df_director_high_movies['Director'][0]}' : '{df_director_high_movies['Movies Count'].max()}'\n
      The Director with Lowest Movies Count Ever Got is '{df_director_low_movies['Director'][0]}' : '{df_director_low_movies['Movies Count'].min()}'\n
      
      The Director with Highest Movies Count on 'Netflix' is '{netflix_director_high_movies['Director'][0]}' : '{netflix_director_high_movies['Netflix'].max()}'\n
      The Director with Lowest Movies Count on 'Netflix' is '{netflix_director_low_movies['Director'][0]}' : '{netflix_director_low_movies['Netflix'].min()}'\n
      
      The Director with Highest Movies Count on 'Hulu' is '{hulu_director_high_movies['Director'][0]}' : '{hulu_director_high_movies['Hulu'].max()}'\n
      The Director with Lowest Movies Count on 'Hulu' is '{hulu_director_low_movies['Director'][0]}' : '{hulu_director_low_movies['Hulu'].min()}'\n
      
      The Director with Highest Movies Count on 'Prime Video' is '{prime_video_director_high_movies['Director'][0]}' : '{prime_video_director_high_movies['Prime Video'].max()}'\n
      The Director with Lowest Movies Count on 'Prime Video' is '{prime_video_director_low_movies['Director'][0]}' : '{prime_video_director_low_movies['Prime Video'].min()}'\n
      
      The Director with Highest Movies Count on 'Disney+' is '{disney_director_high_movies['Director'][0]}' : '{disney_director_high_movies['Disney+'].max()}'\n
      The Director with Lowest Movies Count on 'Disney+' is '{disney_director_low_movies['Director'][0]}' : '{disney_director_low_movies['Disney+'].min()}'\n 
      ''')
      The Director with Highest Movies Count Ever Got is 'Jay Chapman' : '36'

      The Director with Lowest Movies Count Ever Got is 'Peter Ettedgui' : '1'

      
      The Director with Highest Movies Count on 'Netflix' is 'Jan Suter' : '21'

      The Director with Lowest Movies Count on 'Netflix' is 'Jeff Rector' : '0'

      
      The Director with Highest Movies Count on 'Hulu' is 'Savage Steve Holland' : '5'

      The Director with Lowest Movies Count on 'Hulu' is 'Jay Chapman' : '0'

      
      The Director with Highest Movies Count on 'Prime Video' is 'Joseph Kane' : '31'

      The Director with Lowest Movies Count on 'Prime Video' is 'Özcan Alper' : '0'

      
      The Director with Highest Movies Count on 'Disney+' is 'Paul Hoen' : '15'

      The Director with Lowest Movies Count on 'Disney+' is 'Jay Chapman' : '0'
 
      
In [71]:
# Distribution of movies director in each platform
plt.figure(figsize = (20, 5))
plt.title('Director with Movies Count for All Platforms')
sns.violinplot(x = director_data_movies['Movies Count'][:100], color = 'gold', legend = True, kde = True, shade = False)
plt.show()
In [72]:
# Distribution of Director Movies Count in each platform
f1, ax1 = plt.subplots(1, 2 , figsize = (20, 5))
sns.violinplot(x = netflix_director_movies['Netflix'][:100], color = 'red', ax = ax1[0])
sns.violinplot(x = hulu_director_movies['Hulu'][:100], color = 'lightgreen', ax = ax1[1])
 
f2, ax2 = plt.subplots(1, 2 , figsize = (20, 5))
sns.violinplot(x = prime_video_director_movies['Prime Video'][:100], color = 'lightblue', ax = ax2[0])
sns.violinplot(x = disney_director_movies['Disney+'][:100], color = 'darkblue', ax = ax2[1])
plt.show()
In [73]:
print(f'''
      Accross All Platforms the Average Movies Count of Director is '{round(director_data_movies['Movies Count'].mean(), ndigits = 2)}'\n
      The Average Movies Count of Director on 'Netflix' is '{round(netflix_director_movies['Netflix'].mean(), ndigits = 2)}'\n
      The Average Movies Count of Director on 'Hulu' is '{round(hulu_director_movies['Hulu'].mean(), ndigits = 2)}'\n
      The Average Movies Count of Director on 'Prime Video' is '{round(prime_video_director_movies['Prime Video'].mean(), ndigits = 2)}'\n
      The Average Movies Count of Director on 'Disney+' is '{round(disney_director_movies['Disney+'].mean(), ndigits = 2)}'\n 
      ''')
      Accross All Platforms the Average Movies Count of Director is '1.48'

      The Average Movies Count of Director on 'Netflix' is '1.29'

      The Average Movies Count of Director on 'Hulu' is '1.09'

      The Average Movies Count of Director on 'Prime Video' is '1.37'

      The Average Movies Count of Director on 'Disney+' is '1.57'
 
      
In [74]:
print(f'''
      Accross All Platforms Total Count of Director is '{director_data_movies['Director'].unique().shape[0]}'\n
      Total Count of Director on 'Netflix' is '{netflix_director_movies['Director'].unique().shape[0]}'\n
      Total Count of Director on 'Hulu' is '{hulu_director_movies['Director'].unique().shape[0]}'\n
      Total Count of Director on 'Prime Video' is '{prime_video_director_movies['Director'].unique().shape[0]}'\n
      Total Count of Director on 'Disney+' is '{disney_director_movies['Director'].unique().shape[0]}'\n 
      ''')
      Accross All Platforms Total Count of Director is '12760'

      Total Count of Director on 'Netflix' is '3174'

      Total Count of Director on 'Hulu' is '1066'

      Total Count of Director on 'Prime Video' is '9909'

      Total Count of Director on 'Disney+' is '473'
 
      
In [75]:
plt.figure(figsize = (20, 5))
sns.lineplot(x = director_data_movies['Director'][:10], y = director_data_movies['Netflix'][:10], color = 'red')
sns.lineplot(x = director_data_movies['Director'][:10], y = director_data_movies['Hulu'][:10], color = 'lightgreen')
sns.lineplot(x = director_data_movies['Director'][:10], y = director_data_movies['Prime Video'][:10], color = 'lightblue')
sns.lineplot(x = director_data_movies['Director'][:10], y = director_data_movies['Disney+'][:10], color = 'darkblue')
plt.xlabel('Director', fontsize = 20)
plt.ylabel('Movies Count', fontsize = 20)
plt.show()
In [76]:
fig, axes = plt.subplots(2, 2, figsize = (20 , 10))
 
n_d_ax1 = sns.lineplot(y = director_data_movies['Director'][:10], x = director_data_movies['Netflix'][:10], color = 'red', ax = axes[0, 0])
h_d_ax2 = sns.lineplot(y = director_data_movies['Director'][:10], x = director_data_movies['Hulu'][:10], color = 'lightgreen', ax = axes[0, 1])
p_d_ax3 = sns.lineplot(y = director_data_movies['Director'][:10], x = director_data_movies['Prime Video'][:10], color = 'lightblue', ax = axes[1, 0])
d_d_ax4 = sns.lineplot(y = director_data_movies['Director'][:10], x = director_data_movies['Disney+'][:10], color = 'darkblue', ax = axes[1, 1])
 
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
 
n_d_ax1.title.set_text(labels[0])
h_d_ax2.title.set_text(labels[1])
p_d_ax3.title.set_text(labels[2])
d_d_ax4.title.set_text(labels[3])
 
plt.show()
In [77]:
fig, axes = plt.subplots(2, 2, figsize = (20 , 20))
 
n_d_ax1 = sns.barplot(y = netflix_director_movies['Director'][:10], x = netflix_director_movies['Netflix'][:10], palette = 'Reds_r', ax = axes[0, 0])
h_d_ax2 = sns.barplot(y = hulu_director_movies['Director'][:10], x = hulu_director_movies['Hulu'][:10], palette = 'Greens_r', ax = axes[0, 1])
p_d_ax3 = sns.barplot(y = prime_video_director_movies['Director'][:10], x = prime_video_director_movies['Prime Video'][:10], palette = 'Blues_r', ax = axes[1, 0])
d_d_ax4 = sns.barplot(y = disney_director_movies['Director'][:10], x = disney_director_movies['Disney+'][:10], palette = 'BuPu_r', ax = axes[1, 1])
 
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
 
n_d_ax1.title.set_text(labels[0])
h_d_ax2.title.set_text(labels[1])
p_d_ax3.title.set_text(labels[2])
d_d_ax4.title.set_text(labels[3])
 
plt.show()
In [78]:
# Defining plot size and title
plt.figure(figsize = (20, 5))
plt.title('Director  Movies Count Per Platform')
 
# Plotting the information from each dataset into a histogram
sns.kdeplot(netflix_director_movies['Netflix'][:10], color = 'red', legend = True)
sns.kdeplot(hulu_director_movies['Hulu'][:10], color = 'green', legend = True)
sns.kdeplot(prime_video_director_movies['Prime Video'][:10], color = 'lightblue', legend = True)
sns.kdeplot(disney_director_movies['Disney+'][:10], color = 'darkblue', legend = True)                                      
                                      
# Setting the legend
plt.legend(['Netflix', 'Hulu', 'Prime Video', 'Disney+'])
plt.show()
In [79]:
fig, axes = plt.subplots(2, 2, figsize = (20 , 20))
 
n_d_ax1 = sns.barplot(y = director_data_movies['Director'][:10], x = director_data_movies['Netflix'][:10], palette = 'Reds_r', ax = axes[0, 0])
h_d_ax2 = sns.barplot(y = director_data_movies['Director'][:10], x = director_data_movies['Hulu'][:10], palette = 'Greens_r', ax = axes[0, 1])
p_d_ax3 = sns.barplot(y = director_data_movies['Director'][:10], x = director_data_movies['Prime Video'][:10], palette = 'Blues_r', ax = axes[1, 0])
d_d_ax4 = sns.barplot(y = director_data_movies['Director'][:10], x = director_data_movies['Disney+'][:10], palette = 'BuPu_r', ax = axes[1, 1])
 
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
 
n_d_ax1.title.set_text(labels[0])
h_d_ax2.title.set_text(labels[1])
p_d_ax3.title.set_text(labels[2])
d_d_ax4.title.set_text(labels[3])
 
plt.show()
In [80]:
df_movies_mixed_directors.drop(df_movies_mixed_directors.loc[df_movies_mixed_directors['Directors'] == "NA"].index, inplace = True)
# df_movies_mixed_directors = df_movies_mixed_directors[df_movies_mixed_directors.Director != "NA"]
df_movies_mixed_directors.drop(df_movies_mixed_directors.loc[df_movies_mixed_directors['Number of Directors'] == 1].index, inplace = True)
In [81]:
df_movies_mixed_directors.head(5)
Out[81]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Plotline Runtime Kind Netflix Hulu Prime Video Disney+ Type Service Provider Number of Directors
1 2 The Matrix 1999 16 8.7 88 Lana Wachowski,Lilly Wachowski Keanu Reeves,Laurence Fishburne,Carrie-Anne Mo... Action,Sci-Fi United States ... Thomas A. Anderson is a man living two lives. ... 136 movie 1 0 0 0 0 Netflix 2
2 3 Avengers: Infinity War 2018 13 8.4 85 Anthony Russo,Joe Russo Robert Downey Jr.,Chris Hemsworth,Mark Ruffalo... Action,Adventure,Sci-Fi United States ... As the Avengers and their allies have continue... 149 movie 1 0 0 0 0 Netflix 2
5 6 Spider-Man: Into the Spider-Verse 2018 7 8.4 97 Bob Persichetti,Peter Ramsey,Rodney Rothman Shameik Moore,Jake Johnson,Hailee Steinfeld,Ma... Animation,Action,Adventure,Family,Sci-Fi United States ... Phil Lord and Christopher Miller, the creative... 117 movie 1 0 0 0 0 Netflix 3
14 15 Monty Python and the Holy Grail 1975 7 8.2 97 Terry Gilliam,Terry Jones Graham Chapman,John Cleese,Eric Idle,Terry Gil... Adventure,Comedy,Fantasy United Kingdom ... History is turned on its comic head when, in t... 91 movie 1 0 0 0 0 Netflix 2
35 36 Klaus 2019 7 8.2 94 Sergio Pablos,Carlos Martínez López Jason Schwartzman,J.K. Simmons,Rashida Jones,W... Animation,Adventure,Comedy,Family Spain,United Kingdom,United States ... When Jesper (Jason Schwartzman) distinguishes ... 96 movie 1 0 0 0 0 Netflix 2

5 rows × 21 columns

In [82]:
mixed_directors_count = df_movies_mixed_directors.groupby('Directors')['Title'].count()
mixed_directors_movies = df_movies_mixed_directors.groupby('Directors')[['Netflix', 'Hulu', 'Prime Video', 'Disney+']].sum()
mixed_directors_data_movies = pd.concat([mixed_directors_count, mixed_directors_movies], axis = 1).reset_index().rename(columns = {'Title' : 'Movies Count', 'Directors' : 'Mixed Director'})
mixed_directors_data_movies = mixed_directors_data_movies.sort_values(by = 'Movies Count', ascending = False)
In [83]:
mixed_directors_data_movies.head(5)
Out[83]:
Mixed Director Movies Count Netflix Hulu Prime Video Disney+
1174 Raúl Campos,Jan Suter 20 20 0 0 0
11 Abbas Alibhai Burmawalla,Mastan Alibhai Burmaw... 7 2 0 6 0
467 Ethan Coen,Joel Coen 6 4 1 2 0
882 Lana Wachowski,Lilly Wachowski 5 4 0 1 0
455 Elizabeth Banks,Steven Brill,Steve Carr,Rusty ... 5 2 0 4 0
In [84]:
# Mixed Director with Movies Counts - All Platforms Combined
mixed_directors_data_movies.sort_values(by = 'Movies Count', ascending = False)[:10]
Out[84]:
Mixed Director Movies Count Netflix Hulu Prime Video Disney+
1174 Raúl Campos,Jan Suter 20 20 0 0 0
11 Abbas Alibhai Burmawalla,Mastan Alibhai Burmaw... 7 2 0 6 0
467 Ethan Coen,Joel Coen 6 4 1 2 0
882 Lana Wachowski,Lilly Wachowski 5 4 0 1 0
455 Elizabeth Banks,Steven Brill,Steve Carr,Rusty ... 5 2 0 4 0
117 Anthony Russo,Joe Russo 5 1 0 1 3
485 Frank Capra,Anatole Litvak 5 2 0 5 0
447 Eduardo Quiroz,Jose Quiroz 4 0 0 4 0
476 Fenton Bailey,Randy Barbato 4 0 0 4 0
1520 Zack Coffman,Scott Di Lalla 4 0 0 4 0
In [85]:
df_mixed_directors_high_movies = mixed_directors_data_movies.sort_values(by = 'Movies Count', ascending = False).reset_index()
df_mixed_directors_high_movies = df_mixed_directors_high_movies.drop(['index'], axis = 1)
# filter = (mixed_directors_data_movies['Movies Count'] = =  (mixed_directors_data_movies['Movies Count'].max()))
# df_mixed_directors_high_movies = mixed_directors_data_movies[filter]
 
# highest_rated_movies = mixed_directors_data_movies.loc[mixed_directors_data_movies['Movies Count'].idxmax()]
 
print('\nMixed Director with Highest Ever Movies Count are : All Platforms Combined\n')
df_mixed_directors_high_movies.head(5)
Mixed Director with Highest Ever Movies Count are : All Platforms Combined

Out[85]:
Mixed Director Movies Count Netflix Hulu Prime Video Disney+
0 Raúl Campos,Jan Suter 20 20 0 0 0
1 Abbas Alibhai Burmawalla,Mastan Alibhai Burmaw... 7 2 0 6 0
2 Ethan Coen,Joel Coen 6 4 1 2 0
3 Lana Wachowski,Lilly Wachowski 5 4 0 1 0
4 Elizabeth Banks,Steven Brill,Steve Carr,Rusty ... 5 2 0 4 0
In [86]:
fig = px.bar(y = df_mixed_directors_high_movies['Mixed Director'][:15],
             x = df_mixed_directors_high_movies['Movies Count'][:15], 
             color = df_mixed_directors_high_movies['Movies Count'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Number of Mixed Director'},
             title  = 'Movies with Highest Number of Mixed Directors : All Platforms')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [87]:
df_mixed_directors_low_movies = mixed_directors_data_movies.sort_values(by = 'Movies Count', ascending = True).reset_index()
df_mixed_directors_low_movies = df_mixed_directors_low_movies.drop(['index'], axis = 1)
# filter = (mixed_directors_data_movies['Movies Count'] = =  (mixed_directors_data_movies['Movies Count'].min()))
# df_mixed_directors_low_movies = mixed_directors_data_movies[filter]
 
print('\nMixed Director with Lowest Ever Movies Count are : All Platforms Combined\n')
df_mixed_directors_low_movies.head(5)
Mixed Director with Lowest Ever Movies Count are : All Platforms Combined

Out[87]:
Mixed Director Movies Count Netflix Hulu Prime Video Disney+
0 Robin Budd,Donovan Cook 1 0 0 0 1
1 Andrew Stanton,Lee Unkrich 1 0 0 0 1
2 Alex Harvey,Tommy Sowards 1 0 0 1 0
3 Alex Kleider,Corey Ogilvie 1 0 0 1 0
4 Amber Dawn Lee,Rob Brownstein,Jeff Chassler,Ro... 1 0 0 1 0
In [88]:
fig = px.bar(y = df_mixed_directors_low_movies['Mixed Director'][:15],
             x = df_mixed_directors_low_movies['Movies Count'][:15], 
             color = df_mixed_directors_low_movies['Movies Count'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Number of Mixed Director'},
             title  = 'Movies with Lowest Number of Mixed Directors : All Platforms')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [89]:
print(f'''
      Total '{df_movies_directors['Directors'].count()}' Titles are available on All Platforms, out of which\n
      You Can Choose to see Movies from Total '{mixed_directors_data_movies['Mixed Director'].unique().shape[0]}' Mixed Director, They were Like this, \n
 
      {mixed_directors_data_movies.sort_values(by = 'Movies Count', ascending = False)['Mixed Director'].head(5).unique()} etc. \n
 
      The Mixed Director with Highest Movies Count have '{mixed_directors_data_movies['Movies Count'].max()}' Movies Available is '{df_mixed_directors_high_movies['Mixed Director'][0]}', &\n
      The Mixed Director with Lowest Movies Count have '{mixed_directors_data_movies['Movies Count'].min()}' Movies Available is '{df_mixed_directors_low_movies['Mixed Director'][0]}'
      ''')
      Total '16566' Titles are available on All Platforms, out of which

      You Can Choose to see Movies from Total '1527' Mixed Director, They were Like this, 

 
      ['Raúl Campos,Jan Suter'
 'Abbas Alibhai Burmawalla,Mastan Alibhai Burmawalla'
 'Ethan Coen,Joel Coen' 'Lana Wachowski,Lilly Wachowski'
 'Elizabeth Banks,Steven Brill,Steve Carr,Rusty Cundieff,James Duffy,Griffin Dunne,Peter Farrelly,Patrik Forsberg,Will Graham,James Gunn,Brett Ratner,Jonathan van Tulleken,Bob Odenkirk'] etc. 

 
      The Mixed Director with Highest Movies Count have '20' Movies Available is 'Raúl Campos,Jan Suter', &

      The Mixed Director with Lowest Movies Count have '1' Movies Available is 'Robin Budd,Donovan Cook'
      
In [90]:
fig = px.pie(mixed_directors_data_movies[:4], names = 'Mixed Director', values = 'Movies Count', color_discrete_sequence = px.colors.sequential.Teal)
fig.update_traces(textposition = 'inside', textinfo = 'percent+label', title = 'Movies Count based on Mixed Director')
fig.show()
In [91]:
# netflix_mixed_directors_movies = mixed_directors_data_movies[mixed_directors_data_movies['Netflix'] !=  0].sort_values(by = 'Netflix', ascending = False).reset_index()
# netflix_mixed_directors_movies = netflix_mixed_directors_movies.drop(['index', 'Hulu', 'Prime Video', 'Disney+', 'Movies Count'], axis = 1)
 
netflix_mixed_directors_high_movies = df_mixed_directors_high_movies.sort_values(by = 'Netflix', ascending = False).reset_index()
netflix_mixed_directors_high_movies = netflix_mixed_directors_high_movies.drop(['index'], axis = 1)
 
netflix_mixed_directors_low_movies = df_mixed_directors_high_movies.sort_values(by = 'Netflix', ascending = True).reset_index()
netflix_mixed_directors_low_movies = netflix_mixed_directors_low_movies.drop(['index'], axis = 1)
 
netflix_mixed_directors_high_movies.head(5)
Out[91]:
Mixed Director Movies Count Netflix Hulu Prime Video Disney+
0 Raúl Campos,Jan Suter 20 20 0 0 0
1 Ethan Coen,Joel Coen 6 4 1 2 0
2 Lana Wachowski,Lilly Wachowski 5 4 0 1 0
3 Nate Adams,Adam Carolla 3 3 0 0 0
4 Michael Simon,Matthew McNeil 3 3 0 1 0
In [92]:
# hulu_mixed_directors_movies = mixed_directors_data_movies[mixed_directors_data_movies['Hulu'] !=  0].sort_values(by = 'Hulu', ascending = False).reset_index()
# hulu_mixed_directors_movies = hulu_mixed_directors_movies.drop(['index', 'Netflix', 'Prime Video', 'Disney+', 'Movies Count'], axis = 1)
 
hulu_mixed_directors_high_movies = df_mixed_directors_high_movies.sort_values(by = 'Hulu', ascending = False).reset_index()
hulu_mixed_directors_high_movies = hulu_mixed_directors_high_movies.drop(['index'], axis = 1)
 
hulu_mixed_directors_low_movies = df_mixed_directors_high_movies.sort_values(by = 'Hulu', ascending = True).reset_index()
hulu_mixed_directors_low_movies = hulu_mixed_directors_low_movies.drop(['index'], axis = 1)
 
hulu_mixed_directors_high_movies.head(5)
Out[92]:
Mixed Director Movies Count Netflix Hulu Prime Video Disney+
0 Don Argott,Sheena M. Joyce 2 0 2 0 0
1 Jocelyn DeBoer,Dawn Luebbe 1 0 1 0 0
2 Stuart Walker,Mitchell Leisen 1 0 1 1 0
3 Alexander Lahl,Max Mönch 1 0 1 0 0
4 Steve Jones,Todd Jones 1 0 1 1 0
In [93]:
# prime_video_mixed_directors_movies = mixed_directors_data_movies[mixed_directors_data_movies['Prime Video'] !=  0].sort_values(by = 'Prime Video', ascending = False).reset_index()
# prime_video_mixed_directors_movies = prime_video_mixed_directors_movies.drop(['index', 'Netflix', 'Hulu', 'Disney+', 'Movies Count'], axis = 1)
 
prime_video_mixed_directors_high_movies = df_mixed_directors_high_movies.sort_values(by = 'Prime Video', ascending = False).reset_index()
prime_video_mixed_directors_high_movies = prime_video_mixed_directors_high_movies.drop(['index'], axis = 1)
 
prime_video_mixed_directors_low_movies = df_mixed_directors_high_movies.sort_values(by = 'Prime Video', ascending = True).reset_index()
prime_video_mixed_directors_low_movies = prime_video_mixed_directors_low_movies.drop(['index'], axis = 1)
 
prime_video_mixed_directors_high_movies.head(5)
Out[93]:
Mixed Director Movies Count Netflix Hulu Prime Video Disney+
0 Abbas Alibhai Burmawalla,Mastan Alibhai Burmaw... 7 2 0 6 0
1 Frank Capra,Anatole Litvak 5 2 0 5 0
2 Elizabeth Banks,Steven Brill,Steve Carr,Rusty ... 5 2 0 4 0
3 Eduardo Quiroz,Jose Quiroz 4 0 0 4 0
4 Fenton Bailey,Randy Barbato 4 0 0 4 0
In [94]:
# disney_mixed_directors_movies = mixed_directors_data_movies[mixed_directors_data_movies['Disney+'] !=  0].sort_values(by = 'Disney+', ascending = False).reset_index()
# disney_mixed_directors_movies = disney_mixed_directors_movies.drop(['index', 'Netflix', 'Hulu', 'Prime Video', 'Movies Count'], axis = 1)
 
disney_mixed_directors_high_movies = df_mixed_directors_high_movies.sort_values(by = 'Disney+', ascending = False).reset_index()
disney_mixed_directors_high_movies = disney_mixed_directors_high_movies.drop(['index'], axis = 1)
 
disney_mixed_directors_low_movies = df_mixed_directors_high_movies.sort_values(by = 'Disney+', ascending = True).reset_index()
disney_mixed_directors_low_movies = disney_mixed_directors_low_movies.drop(['index'], axis = 1)
 
disney_mixed_directors_high_movies.head(5)
Out[94]:
Mixed Director Movies Count Netflix Hulu Prime Video Disney+
0 Anthony Russo,Joe Russo 5 1 0 1 3
1 Ron Clements,John Musker 3 1 0 0 3
2 Robert C. Ramirez,Patrick A. Ventura 2 0 0 0 2
3 Chris Buck,Jennifer Lee 2 0 0 0 2
4 Saul Blinkoff,Elliot M. Bour 2 0 0 0 2
In [95]:
f, ax = plt.subplots(1, 2 , figsize = (20, 5))
sns.distplot(mixed_directors_data_movies['Movies Count'], bins = 20, kde = True, ax = ax[0])
sns.boxplot(mixed_directors_data_movies['Movies Count'], ax = ax[1])
plt.show()
In [96]:
# Creating distinct dataframes only with the movies present on individual streaming platforms
netflix_mixed_directors_movies = mixed_directors_data_movies[mixed_directors_data_movies['Netflix'] !=  0].sort_values(by = 'Netflix', ascending = False).reset_index()
netflix_mixed_directors_movies = netflix_mixed_directors_movies.drop(['index', 'Hulu', 'Prime Video', 'Disney+', 'Movies Count'], axis = 1)

hulu_mixed_directors_movies = mixed_directors_data_movies[mixed_directors_data_movies['Hulu'] !=  0].sort_values(by = 'Hulu', ascending = False).reset_index()
hulu_mixed_directors_movies = hulu_mixed_directors_movies.drop(['index', 'Netflix', 'Prime Video', 'Disney+', 'Movies Count'], axis = 1)

prime_video_mixed_directors_movies = mixed_directors_data_movies[mixed_directors_data_movies['Prime Video'] !=  0].sort_values(by = 'Prime Video', ascending = False).reset_index()
prime_video_mixed_directors_movies = prime_video_mixed_directors_movies.drop(['index', 'Netflix', 'Hulu', 'Disney+', 'Movies Count'], axis = 1)

disney_mixed_directors_movies = mixed_directors_data_movies[mixed_directors_data_movies['Disney+'] !=  0].sort_values(by = 'Disney+', ascending = False).reset_index()
disney_mixed_directors_movies = disney_mixed_directors_movies.drop(['index', 'Netflix', 'Hulu', 'Prime Video', 'Movies Count'], axis = 1)
In [97]:
# Defining plot size and title
plt.figure(figsize = (20, 5))
plt.title('Mixed Director Movies Count Per Platform')
 
# Plotting the information from each dataset into a histogram
 
sns.histplot(prime_video_mixed_directors_movies['Prime Video'][:100], color = 'lightblue', legend = True, kde = True)
sns.histplot(netflix_mixed_directors_movies['Netflix'][:100], color = 'red', legend = True, kde = True)
sns.histplot(hulu_mixed_directors_movies['Hulu'][:100], color = 'lightgreen', legend = True, kde = True)
sns.histplot(disney_mixed_directors_movies['Disney+'][:100], color = 'darkblue', legend = True, kde = True)                                
 
# Setting the legend
plt.legend(['Prime Video', 'Netflix', 'Hulu', 'Disney+'])
plt.show()
In [98]:
print(f'''
      The Mixed Director with Highest Movies Count Ever Got is '{df_mixed_directors_high_movies['Mixed Director'][0]}' : '{df_mixed_directors_high_movies['Movies Count'].max()}'\n
      The Mixed Director with Lowest Movies Count Ever Got is '{df_mixed_directors_low_movies['Mixed Director'][0]}' : '{df_mixed_directors_low_movies['Movies Count'].min()}'\n
      
      The Mixed Director with Highest Movies Count on 'Netflix' is '{netflix_mixed_directors_high_movies['Mixed Director'][0]}' : '{netflix_mixed_directors_high_movies['Netflix'].max()}'\n
      The Mixed Director with Lowest Movies Count on 'Netflix' is '{netflix_mixed_directors_low_movies['Mixed Director'][0]}' : '{netflix_mixed_directors_low_movies['Netflix'].min()}'\n
      
      The Mixed Director with Highest Movies Count on 'Hulu' is '{hulu_mixed_directors_high_movies['Mixed Director'][0]}' : '{hulu_mixed_directors_high_movies['Hulu'].max()}'\n
      The Mixed Director with Lowest Movies Count on 'Hulu' is '{hulu_mixed_directors_low_movies['Mixed Director'][0]}' : '{hulu_mixed_directors_low_movies['Hulu'].min()}'\n
      
      The Mixed Director with Highest Movies Count on 'Prime Video' is '{prime_video_mixed_directors_high_movies['Mixed Director'][0]}' : '{prime_video_mixed_directors_high_movies['Prime Video'].max()}'\n
      The Mixed Director with Lowest Movies Count on 'Prime Video' is '{prime_video_mixed_directors_low_movies['Mixed Director'][0]}' : '{prime_video_mixed_directors_low_movies['Prime Video'].min()}'\n
      
      The Mixed Director with Highest Movies Count on 'Disney+' is '{disney_mixed_directors_high_movies['Mixed Director'][0]}' : '{disney_mixed_directors_high_movies['Disney+'].max()}'\n
      The Mixed Director with Lowest Movies Count on 'Disney+' is '{disney_mixed_directors_low_movies['Mixed Director'][0]}' : '{disney_mixed_directors_low_movies['Disney+'].min()}'\n 
      ''')
      The Mixed Director with Highest Movies Count Ever Got is 'Raúl Campos,Jan Suter' : '20'

      The Mixed Director with Lowest Movies Count Ever Got is 'Robin Budd,Donovan Cook' : '1'

      
      The Mixed Director with Highest Movies Count on 'Netflix' is 'Raúl Campos,Jan Suter' : '20'

      The Mixed Director with Lowest Movies Count on 'Netflix' is 'Jim Kammerud,Brian Smith,Bill Speers' : '0'

      
      The Mixed Director with Highest Movies Count on 'Hulu' is 'Don Argott,Sheena M. Joyce' : '2'

      The Mixed Director with Lowest Movies Count on 'Hulu' is 'Raúl Campos,Jan Suter' : '0'

      
      The Mixed Director with Highest Movies Count on 'Prime Video' is 'Abbas Alibhai Burmawalla,Mastan Alibhai Burmawalla' : '6'

      The Mixed Director with Lowest Movies Count on 'Prime Video' is 'Raúl Campos,Jan Suter' : '0'

      
      The Mixed Director with Highest Movies Count on 'Disney+' is 'Anthony Russo,Joe Russo' : '3'

      The Mixed Director with Lowest Movies Count on 'Disney+' is 'Raúl Campos,Jan Suter' : '0'
 
      
In [99]:
print(f'''
      Accross All Platforms the Average Movies Count of Mixed Director is '{round(mixed_directors_data_movies['Movies Count'].mean(), ndigits = 2)}'\n
      The Average Movies Count of Mixed Director on 'Netflix' is '{round(netflix_mixed_directors_movies['Netflix'].mean(), ndigits = 2)}'\n
      The Average Movies Count of Mixed Director on 'Hulu' is '{round(hulu_mixed_directors_movies['Hulu'].mean(), ndigits = 2)}'\n
      The Average Movies Count of Mixed Director on 'Prime Video' is '{round(prime_video_mixed_directors_movies['Prime Video'].mean(), ndigits = 2)}'\n
      The Average Movies Count of Mixed Director on 'Disney+' is '{round(disney_mixed_directors_movies['Disney+'].mean(), ndigits = 2)}'\n 
      ''')
      Accross All Platforms the Average Movies Count of Mixed Director is '1.1'

      The Average Movies Count of Mixed Director on 'Netflix' is '1.12'

      The Average Movies Count of Mixed Director on 'Hulu' is '1.01'

      The Average Movies Count of Mixed Director on 'Prime Video' is '1.07'

      The Average Movies Count of Mixed Director on 'Disney+' is '1.1'
 
      
In [100]:
print(f'''
      Accross All Platforms Total Count of Mixed Director is '{mixed_directors_data_movies['Mixed Director'].unique().shape[0]}'\n
      Total Count of Mixed Director on 'Netflix' is '{netflix_mixed_directors_movies['Mixed Director'].unique().shape[0]}'\n
      Total Count of Mixed Director on 'Hulu' is '{hulu_mixed_directors_movies['Mixed Director'].unique().shape[0]}'\n
      Total Count of Mixed Director on 'Prime Video' is '{prime_video_mixed_directors_movies['Mixed Director'].unique().shape[0]}'\n
      Total Count of Mixed Director on 'Disney+' is '{disney_mixed_directors_movies['Mixed Director'].unique().shape[0]}'\n 
      ''')
      Accross All Platforms Total Count of Mixed Director is '1527'

      Total Count of Mixed Director on 'Netflix' is '373'

      Total Count of Mixed Director on 'Hulu' is '93'

      Total Count of Mixed Director on 'Prime Video' is '1047'

      Total Count of Mixed Director on 'Disney+' is '104'
 
      
In [101]:
plt.figure(figsize = (20, 5))
sns.lineplot(x = mixed_directors_data_movies['Mixed Director'][:5], y = mixed_directors_data_movies['Netflix'][:5], color = 'red')
sns.lineplot(x = mixed_directors_data_movies['Mixed Director'][:5], y = mixed_directors_data_movies['Hulu'][:5], color = 'lightgreen')
sns.lineplot(x = mixed_directors_data_movies['Mixed Director'][:5], y = mixed_directors_data_movies['Prime Video'][:5], color = 'lightblue')
sns.lineplot(x = mixed_directors_data_movies['Mixed Director'][:5], y = mixed_directors_data_movies['Disney+'][:5], color = 'darkblue')
plt.xlabel('Mixed Director', fontsize = 15)
plt.ylabel('Movies Count', fontsize = 15)
plt.show()
In [102]:
fig, axes = plt.subplots(2, 2, figsize = (20 , 20))
 
n_d_ax1 = sns.barplot(x = mixed_directors_data_movies['Mixed Director'][:10], y = mixed_directors_data_movies['Netflix'][:10], palette = 'Reds_r', ax = axes[0, 0])
h_d_ax2 = sns.barplot(x = mixed_directors_data_movies['Mixed Director'][:10], y = mixed_directors_data_movies['Hulu'][:10], palette = 'Greens_r', ax = axes[0, 1])
p_d_ax3 = sns.barplot(x = mixed_directors_data_movies['Mixed Director'][:10], y = mixed_directors_data_movies['Prime Video'][:10], palette = 'Blues_r', ax = axes[1, 0])
d_d_ax4 = sns.barplot(x = mixed_directors_data_movies['Mixed Director'][:10], y = mixed_directors_data_movies['Disney+'][:10], palette = 'BuPu_r', ax = axes[1, 1])
 
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
 
n_d_ax1.title.set_text(labels[0])
h_d_ax2.title.set_text(labels[1])
p_d_ax3.title.set_text(labels[2])
d_d_ax4.title.set_text(labels[3])
 
plt.show()
In [103]:
fig, axes = plt.subplots(2, 2, figsize = (20 , 10))
 
n_md_ax1 = sns.lineplot(x = mixed_directors_data_movies['Mixed Director'][:10], y = mixed_directors_data_movies['Netflix'][:10], color = 'red', ax = axes[0, 0])
h_md_ax2 = sns.lineplot(x = mixed_directors_data_movies['Mixed Director'][:10], y = mixed_directors_data_movies['Hulu'][:10], color = 'lightgreen', ax = axes[0, 1])
p_md_ax3 = sns.lineplot(x = mixed_directors_data_movies['Mixed Director'][:10], y = mixed_directors_data_movies['Prime Video'][:10], color = 'lightblue', ax = axes[1, 0])
d_md_ax4 = sns.lineplot(x = mixed_directors_data_movies['Mixed Director'][:10], y = mixed_directors_data_movies['Disney+'][:10], color = 'darkblue', ax = axes[1, 1])
 
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
 
n_md_ax1.title.set_text(labels[0])
h_md_ax2.title.set_text(labels[1])
p_md_ax3.title.set_text(labels[2])
d_md_ax4.title.set_text(labels[3])
 
plt.show()
In [104]:
# Defining plot size and title
plt.figure(figsize = (20, 5))
plt.title('Mixed Director  Movies Count Per Platform')
 
# Plotting the information from each dataset into a histogram
sns.kdeplot(netflix_mixed_directors_movies['Netflix'][:50], color = 'red', legend = True)
sns.kdeplot(hulu_mixed_directors_movies['Hulu'][:50], color = 'green', legend = True)
sns.kdeplot(prime_video_mixed_directors_movies['Prime Video'][:50], color = 'lightblue', legend = True)
sns.kdeplot(disney_mixed_directors_movies['Disney+'][:50], color = 'darkblue', legend = True)                                      

# Setting the legend
plt.legend(['Netflix', 'Hulu', 'Prime Video', 'Disney+'])
plt.show()
In [105]:
fig, axes = plt.subplots(2, 2, figsize = (20 , 20))
 
n_md_ax1 = sns.barplot(x = netflix_mixed_directors_movies['Mixed Director'][:10], y = netflix_mixed_directors_movies['Netflix'][:10], palette = 'Reds_r', ax = axes[0, 0])
h_md_ax2 = sns.barplot(x = hulu_mixed_directors_movies['Mixed Director'][:10], y = hulu_mixed_directors_movies['Hulu'][:10], palette = 'Greens_r', ax = axes[0, 1])
p_md_ax3 = sns.barplot(x = prime_video_mixed_directors_movies['Mixed Director'][:10], y = prime_video_mixed_directors_movies['Prime Video'][:10], palette = 'Blues_r', ax = axes[1, 0])
d_md_ax4 = sns.barplot(x = disney_mixed_directors_movies['Mixed Director'][:10], y = disney_mixed_directors_movies['Disney+'][:10], palette = 'BuPu_r', ax = axes[1, 1])
 
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
 
n_md_ax1.title.set_text(labels[0])
h_md_ax2.title.set_text(labels[1])
p_md_ax3.title.set_text(labels[2])
d_md_ax4.title.set_text(labels[3])
 
plt.show()
In [106]:
fig = go.Figure(go.Funnel(y = mixed_directors_data_movies['Mixed Director'][:10], x = mixed_directors_data_movies['Movies Count'][:10]))
fig.show()